1068 files changed, 179540 insertions, 0 deletions
diff --git a/tools/testing/fault-injection/failcmd.sh b/tools/testing/fault-injection/failcmd.sh
new file mode 100644
index 000000000..29a6c63c5
--- /dev/null
+++ b/tools/testing/fault-injection/failcmd.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# NAME
+#	failcmd.sh - run a command with injecting slab/page allocation failures
+#
+# SYNOPSIS
+#	failcmd.sh --help
+#	failcmd.sh [<options>] command [arguments]
+#
+# DESCRIPTION
+#	Run command with injecting slab/page allocation failures by fault
+#	injection.
+#
+#	NOTE: you need to run this script as root.
+#
+
+usage()
+{
+	cat >&2 <<EOF
+Usage: $0 [options] command [arguments]
+
+OPTIONS
+	-p percent
+	--probability=percent
+		likelihood of failure injection, in percent.
+		Default value is 1
+
+	-t value
+	--times=value
+		specifies how many times failures may happen at most.
+		Default value is 1
+
+	--oom-kill-allocating-task=value
+		set /proc/sys/vm/oom_kill_allocating_task to specified value
+		before running the command.
+		Default value is 1
+
+	-h, --help
+		Display a usage message and exit
+
+	--interval=value, --space=value, --verbose=value, --task-filter=value,
+	--stacktrace-depth=value, --require-start=value, --require-end=value,
+	--reject-start=value, --reject-end=value, --ignore-gfp-wait=value
+		See Documentation/fault-injection/fault-injection.txt for more
+		information
+
+	failslab options:
+	--cache-filter=value
+
+	fail_page_alloc options:
+	--ignore-gfp-highmem=value, --min-order=value
+
+ENVIRONMENT
+	FAILCMD_TYPE
+		The following values for FAILCMD_TYPE are recognized:
+
+		failslab
+			inject slab allocation failures
+		fail_page_alloc
+			inject page allocation failures
+
+		If FAILCMD_TYPE is not defined, then failslab is used.
+EOF
+}
+
+if [ $UID != 0 ]; then
+	echo must be run as root >&2
+	exit 1
+fi
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3}'`
+
+if [ ! -d "$DEBUGFS" ]; then
+	echo debugfs is not mounted >&2
+	exit 1
+fi
+
+FAILCMD_TYPE=${FAILCMD_TYPE:-failslab}
+FAULTATTR=$DEBUGFS/$FAILCMD_TYPE
+
+if [ ! -d $FAULTATTR ]; then
+	echo $FAILCMD_TYPE is not available >&2
+	exit 1
+fi
+
+LONGOPTS=probability:,interval:,times:,space:,verbose:,task-filter:
+LONGOPTS=$LONGOPTS,stacktrace-depth:,require-start:,require-end:
+LONGOPTS=$LONGOPTS,reject-start:,reject-end:,oom-kill-allocating-task:,help
+
+if [ $FAILCMD_TYPE = failslab ]; then
+	LONGOPTS=$LONGOPTS,ignore-gfp-wait:,cache-filter:
+elif [ $FAILCMD_TYPE = fail_page_alloc ]; then
+	LONGOPTS=$LONGOPTS,ignore-gfp-wait:,ignore-gfp-highmem:,min-order:
+fi
+
+TEMP=`getopt -o p:i:t:s:v:h --long $LONGOPTS -n 'failcmd.sh' -- "$@"`
+
+if [ $? != 0 ]; then
+	usage
+	exit 1
+fi
+
+eval set -- "$TEMP"
+
+fault_attr_default()
+{
+	echo N > $FAULTATTR/task-filter
+	echo 0 > $FAULTATTR/probability
+	echo 1 > $FAULTATTR/times
+}
+
+fault_attr_default
+
+oom_kill_allocating_task_saved=`cat /proc/sys/vm/oom_kill_allocating_task`
+
+restore_values()
+{
+	fault_attr_default
+	echo $oom_kill_allocating_task_saved \
+		> /proc/sys/vm/oom_kill_allocating_task
+}
+
+#
+# Default options
+#
+declare -i oom_kill_allocating_task=1
+declare task_filter=Y
+declare -i probability=1
+declare -i times=1
+
+while true; do
+	case "$1" in
+	-p|--probability)
+		probability=$2
+		shift 2
+		;;
+	-i|--interval)
+		echo $2 > $FAULTATTR/interval
+		shift 2
+		;;
+	-t|--times)
+		times=$2
+		shift 2
+		;;
+	-s|--space)
+		echo $2 > $FAULTATTR/space
+		shift 2
+		;;
+	-v|--verbose)
+		echo $2 > $FAULTATTR/verbose
+		shift 2
+		;;
+	--task-filter)
+		task_filter=$2
+		shift 2
+		;;
+	--stacktrace-depth)
+		echo $2 > $FAULTATTR/stacktrace-depth
+		shift 2
+		;;
+	--require-start)
+		echo $2 > $FAULTATTR/require-start
+		shift 2
+		;;
+	--require-end)
+		echo $2 > $FAULTATTR/require-end
+		shift 2
+		;;
+	--reject-start)
+		echo $2 > $FAULTATTR/reject-start
+		shift 2
+		;;
+	--reject-end)
+		echo $2 > $FAULTATTR/reject-end
+		shift 2
+		;;
+	--oom-kill-allocating-task)
+		oom_kill_allocating_task=$2
+		shift 2
+		;;
+	--ignore-gfp-wait)
+		echo $2 > $FAULTATTR/ignore-gfp-wait
+		shift 2
+		;;
+	--cache-filter)
+		echo $2 > $FAULTATTR/cache_filter
+		shift 2
+		;;
+	--ignore-gfp-highmem)
+		echo $2 > $FAULTATTR/ignore-gfp-highmem
+		shift 2
+		;;
+	--min-order)
+		echo $2 > $FAULTATTR/min-order
+		shift 2
+		;;
+	-h|--help)
+		usage
+		exit 0
+		shift
+		;;
+	--)
+		shift
+		break
+		;;
+	esac
+done
+
+[ -z "$1" ] && exit 0
+
+echo $oom_kill_allocating_task > /proc/sys/vm/oom_kill_allocating_task
+echo $task_filter > $FAULTATTR/task-filter
+echo $probability > $FAULTATTR/probability
+echo $times > $FAULTATTR/times
+
+trap "restore_values" SIGINT SIGTERM EXIT
+
+cmd="echo 1 > /proc/self/make-it-fail && exec $@"
+bash -c "$cmd"
diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl
new file mode 100755
index 000000000..ebea21d0a
--- /dev/null
+++ b/tools/testing/ktest/compare-ktest-sample.pl
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+open (IN,"ktest.pl");
+while (<IN>) {
+    # hashes are now used
+    if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ ||
+	/^\s*"?([A-Z].*?)"?\s*=>\s*/ ||
+	/set_test_option\("(.*?)"/) {
+	$opt{$1} = 1;
+    }
+}
+close IN;
+
+open (IN, "sample.conf");
+while (<IN>) {
+    if (/^\s*#?\s*([A-Z]\S*)\s*=/) {
+	$samp{$1} = 1;
+    }
+}
+close IN;
+
+foreach $opt (keys %opt) {
+    if (!defined($samp{$opt})) {
+	print "opt = $opt\n";
+    }
+}
+
+foreach $samp (keys %samp) {
+    if (!defined($opt{$samp})) {
+	print "samp = $samp\n";
+    }
+}
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
new file mode 100755
index 000000000..b28feea7c
--- /dev/null
+++ b/tools/testing/ktest/config-bisect.pl
@@ -0,0 +1,770 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2015 - Steven Rostedt, Red Hat Inc.
+# Copyright 2017 - Steven Rostedt, VMware, Inc.
+#
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+# usage:
+#  config-bisect.pl [options] good-config bad-config [good|bad]
+#
+
+# Compares a good config to a bad config, then takes half of the diffs
+# and produces a config that is somewhere between the good config and
+# the bad config. That is, the resulting config will start with the
+# good config and will try to make half of the differences of between
+# the good and bad configs match the bad config. It tries because of
+# dependencies between the two configs it may not be able to change
+# exactly half of the configs that are different between the two config
+# files.
+
+# Here's a normal way to use it:
+#
+#  $ cd /path/to/linux/kernel
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config
+
+# This will now pull in good config (blowing away .config in that directory
+# so do not make that be one of the good or bad configs), and then
+# build the config with "make oldconfig" to make sure it matches the
+# current kernel. It will then store the configs in that result for
+# the good config. It does the same for the bad config as well.
+# The algorithm will run, merging half of the differences between
+# the two configs and building them with "make oldconfig" to make sure
+# the result changes (dependencies may reset changes the tool had made).
+# It then copies the result of its good config to /path/to/good/config.tmp
+# and the bad config to /path/to/bad/config.tmp (just appends ".tmp" to the
+# files passed in). And the ".config" that you should test will be in
+# directory
+
+# After the first run, determine if the result is good or bad then
+# run the same command appending the result
+
+# For good results:
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config good
+
+# For bad results:
+#  $ config-bisect.pl /path/to/good/config /path/to/bad/config bad
+
+# Do not change the good-config or bad-config, config-bisect.pl will
+# copy the good-config to a temp file with the same name as good-config
+# but with a ".tmp" after it. It will do the same with the bad-config.
+
+# If "good" or "bad" is not stated at the end, it will copy the good and
+# bad configs to the .tmp versions. If a .tmp version already exists, it will
+# warn before writing over them (-r will not warn, and just write over them).
+# If the last config is labeled "good", then it will copy it to the good .tmp
+# version. If the last config is labeled "bad", it will copy it to the bad
+# .tmp version. It will continue this until it can not merge the two any more
+# without the result being equal to either the good or bad .tmp configs.
+
+my $start = 0;
+my $val = "";
+
+my $pwd = `pwd`;
+chomp $pwd;
+my $tree = $pwd;
+my $build;
+
+my $output_config;
+my $reset_bisect;
+
+sub usage {
+    print << "EOF"
+
+usage: config-bisect.pl [-l linux-tree][-b build-dir] good-config bad-config [good|bad]
+  -l [optional] define location of linux-tree (default is current directory)
+  -b [optional] define location to build (O=build-dir) (default is linux-tree)
+  good-config the config that is considered good
+  bad-config the config that does not work
+  "good" add this if the last run produced a good config
+  "bad" add this if the last run produced a bad config
+  If "good" or "bad" is not specified, then it is the start of a new bisect
+
+  Note, each run will create copy of good and bad configs with ".tmp" appended.
+
+EOF
+;
+
+    exit(-1);
+}
+
+sub doprint {
+    print @_;
+}
+
+sub dodie {
+    doprint "CRITICAL FAILURE... ", @_, "\n";
+
+    die @_, "\n";
+}
+
+sub expand_path {
+    my ($file) = @_;
+
+    if ($file =~ m,^/,) {
+	return $file;
+    }
+    return "$pwd/$file";
+}
+
+sub read_prompt {
+    my ($cancel, $prompt) = @_;
+
+    my $ans;
+
+    for (;;) {
+	if ($cancel) {
+	    print "$prompt [y/n/C] ";
+	} else {
+	    print "$prompt [y/N] ";
+	}
+	$ans = <STDIN>;
+	chomp $ans;
+	if ($ans =~ /^\s*$/) {
+	    if ($cancel) {
+		$ans = "c";
+	    } else {
+		$ans = "n";
+	    }
+	}
+	last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+	if ($cancel) {
+	    last if ($ans =~ /^c$/i);
+	    print "Please answer either 'y', 'n' or 'c'.\n";
+	} else {
+	    print "Please answer either 'y' or 'n'.\n";
+	}
+    }
+    if ($ans =~ /^c/i) {
+	exit;
+    }
+    if ($ans !~ /^y$/i) {
+	return 0;
+    }
+    return 1;
+}
+
+sub read_yn {
+    my ($prompt) = @_;
+
+    return read_prompt 0, $prompt;
+}
+
+sub read_ync {
+    my ($prompt) = @_;
+
+    return read_prompt 1, $prompt;
+}
+
+sub run_command {
+    my ($command, $redirect) = @_;
+    my $start_time;
+    my $end_time;
+    my $dord = 0;
+    my $pid;
+
+    $start_time = time;
+
+    doprint("$command ... ");
+
+    $pid = open(CMD, "$command 2>&1 |") or
+	dodie "unable to exec $command";
+
+    if (defined($redirect)) {
+	open (RD, ">$redirect") or
+	    dodie "failed to write to redirect $redirect";
+	$dord = 1;
+    }
+
+    while (<CMD>) {
+	print RD  if ($dord);
+    }
+
+    waitpid($pid, 0);
+    my $failed = $?;
+
+    close(CMD);
+    close(RD)  if ($dord);
+
+    $end_time = time;
+    my $delta = $end_time - $start_time;
+
+    if ($delta == 1) {
+	doprint "[1 second] ";
+    } else {
+	doprint "[$delta seconds] ";
+    }
+
+    if ($failed) {
+	doprint "FAILED!\n";
+    } else {
+	doprint "SUCCESS\n";
+    }
+
+    return !$failed;
+}
+
+###### CONFIG BISECT ######
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+my $make;
+
+sub make_oldconfig {
+
+    if (!run_command "$make olddefconfig") {
+	# Perhaps olddefconfig doesn't exist in this version of the kernel
+	# try oldnoconfig
+	doprint "olddefconfig failed, trying make oldnoconfig\n";
+	if (!run_command "$make oldnoconfig") {
+	    doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+	    # try a yes '' | oldconfig
+	    run_command "yes '' | $make oldconfig" or
+		dodie "failed make config oldconfig";
+	}
+    }
+}
+
+sub assign_configs {
+    my ($hash, $config) = @_;
+
+    doprint "Reading configs from $config\n";
+
+    open (IN, $config)
+	or dodie "Failed to read $config";
+
+    while (<IN>) {
+	chomp;
+	if (/^((CONFIG\S*)=.*)/) {
+	    ${$hash}{$2} = $1;
+	} elsif (/^(# (CONFIG\S*) is not set)/) {
+	    ${$hash}{$2} = $1;
+	}
+    }
+
+    close(IN);
+}
+
+sub process_config_ignore {
+    my ($config) = @_;
+
+    assign_configs \%config_ignore, $config;
+}
+
+sub get_dependencies {
+    my ($config) = @_;
+
+    my $arr = $dependency{$config};
+    if (!defined($arr)) {
+	return ();
+    }
+
+    my @deps = @{$arr};
+
+    foreach my $dep (@{$arr}) {
+	print "ADD DEP $dep\n";
+	@deps = (@deps, get_dependencies $dep);
+    }
+
+    return @deps;
+}
+
+sub save_config {
+    my ($pc, $file) = @_;
+
+    my %configs = %{$pc};
+
+    doprint "Saving configs into $file\n";
+
+    open(OUT, ">$file") or dodie "Can not write to $file";
+
+    foreach my $config (keys %configs) {
+	print OUT "$configs{$config}\n";
+    }
+    close(OUT);
+}
+
+sub create_config {
+    my ($name, $pc) = @_;
+
+    doprint "Creating old config from $name configs\n";
+
+    save_config $pc, $output_config;
+
+    make_oldconfig;
+}
+
+# compare two config hashes, and return configs with different vals.
+# It returns B's config values, but you can use A to see what A was.
+sub diff_config_vals {
+    my ($pa, $pb) = @_;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    my %ret;
+
+    foreach my $item (keys %a) {
+	if (defined($b{$item}) && $b{$item} ne $a{$item}) {
+	    $ret{$item} = $b{$item};
+	}
+    }
+
+    return %ret;
+}
+
+# compare two config hashes and return the configs in B but not A
+sub diff_configs {
+    my ($pa, $pb) = @_;
+
+    my %ret;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    foreach my $item (keys %b) {
+	if (!defined($a{$item})) {
+	    $ret{$item} = $b{$item};
+	}
+    }
+
+    return %ret;
+}
+
+# return if two configs are equal or not
+# 0 is equal +1 b has something a does not
+# +1 if a and b have a different item.
+# -1 if a has something b does not
+sub compare_configs {
+    my ($pa, $pb) = @_;
+
+    my %ret;
+
+    # crappy Perl way to pass in hashes.
+    my %a = %{$pa};
+    my %b = %{$pb};
+
+    foreach my $item (keys %b) {
+	if (!defined($a{$item})) {
+	    return 1;
+	}
+	if ($a{$item} ne $b{$item}) {
+	    return 1;
+	}
+    }
+
+    foreach my $item (keys %a) {
+	if (!defined($b{$item})) {
+	    return -1;
+	}
+    }
+
+    return 0;
+}
+
+sub process_failed {
+    my ($config) = @_;
+
+    doprint "\n\n***************************************\n";
+    doprint "Found bad config: $config\n";
+    doprint "***************************************\n\n";
+}
+
+sub process_new_config {
+    my ($tc, $nc, $gc, $bc) = @_;
+
+    my %tmp_config = %{$tc};
+    my %good_configs = %{$gc};
+    my %bad_configs = %{$bc};
+
+    my %new_configs;
+
+    my $runtest = 1;
+    my $ret;
+
+    create_config "tmp_configs", \%tmp_config;
+    assign_configs \%new_configs, $output_config;
+
+    $ret = compare_configs \%new_configs, \%bad_configs;
+    if (!$ret) {
+	doprint "New config equals bad config, try next test\n";
+	$runtest = 0;
+    }
+
+    if ($runtest) {
+	$ret = compare_configs \%new_configs, \%good_configs;
+	if (!$ret) {
+	    doprint "New config equals good config, try next test\n";
+	    $runtest = 0;
+	}
+    }
+
+    %{$nc} = %new_configs;
+
+    return $runtest;
+}
+
+sub convert_config {
+    my ($config) = @_;
+
+    if ($config =~ /^# (.*) is not set/) {
+	$config = "$1=n";
+    }
+
+    $config =~ s/^CONFIG_//;
+    return $config;
+}
+
+sub print_config {
+    my ($sym, $config) = @_;
+
+    $config = convert_config $config;
+    doprint "$sym$config\n";
+}
+
+sub print_config_compare {
+    my ($good_config, $bad_config) = @_;
+
+    $good_config = convert_config $good_config;
+    $bad_config = convert_config $bad_config;
+
+    my $good_value = $good_config;
+    my $bad_value = $bad_config;
+    $good_value =~ s/(.*)=//;
+    my $config = $1;
+
+    $bad_value =~ s/.*=//;
+
+    doprint " $config $good_value -> $bad_value\n";
+}
+
+# Pass in:
+# $phalf: half of the configs names you want to add
+# $oconfigs: The orginial configs to start with
+# $sconfigs: The source to update $oconfigs with (from $phalf)
+# $which: The name of which half that is updating (top / bottom)
+# $type: The name of the source type (good / bad)
+sub make_half {
+    my ($phalf, $oconfigs, $sconfigs, $which, $type) = @_;
+
+    my @half = @{$phalf};
+    my %orig_configs = %{$oconfigs};
+    my %source_configs = %{$sconfigs};
+
+    my %tmp_config = %orig_configs;
+
+    doprint "Settings bisect with $which half of $type configs:\n";
+    foreach my $item (@half) {
+	doprint "Updating $item to $source_configs{$item}\n";
+	$tmp_config{$item} = $source_configs{$item};
+    }
+
+    return %tmp_config;
+}
+
+sub run_config_bisect {
+    my ($pgood, $pbad) = @_;
+
+    my %good_configs = %{$pgood};
+    my %bad_configs = %{$pbad};
+
+    my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
+    my %b_configs = diff_configs \%good_configs, \%bad_configs;
+    my %g_configs = diff_configs \%bad_configs, \%good_configs;
+
+    # diff_arr is what is in both good and bad but are different (y->n)
+    my @diff_arr = keys %diff_configs;
+    my $len_diff = $#diff_arr + 1;
+
+    # b_arr is what is in bad but not in good (has depends)
+    my @b_arr = keys %b_configs;
+    my $len_b = $#b_arr + 1;
+
+    # g_arr is what is in good but not in bad
+    my @g_arr = keys %g_configs;
+    my $len_g = $#g_arr + 1;
+
+    my $runtest = 0;
+    my %new_configs;
+    my $ret;
+
+    # Look at the configs that are different between good and bad.
+    # This does not include those that depend on other configs
+    #  (configs depending on other configs that are not set would
+    #   not show up even as a "# CONFIG_FOO is not set"
+
+
+    doprint "# of configs to check:             $len_diff\n";
+    doprint "# of configs showing only in good: $len_g\n";
+    doprint "# of configs showing only in bad:  $len_b\n";
+
+    if ($len_diff > 0) {
+	# Now test for different values
+
+	doprint "Configs left to check:\n";
+	doprint "  Good Config\t\t\tBad Config\n";
+	doprint "  -----------\t\t\t----------\n";
+	foreach my $item (@diff_arr) {
+	    doprint "  $good_configs{$item}\t$bad_configs{$item}\n";
+	}
+
+	my $half = int($#diff_arr / 2);
+	my @tophalf = @diff_arr[0 .. $half];
+
+	doprint "Set tmp config to be good config with some bad config values\n";
+
+	my %tmp_config = make_half \@tophalf, \%good_configs,
+	    \%bad_configs, "top", "bad";
+
+	$runtest = process_new_config \%tmp_config, \%new_configs,
+			    \%good_configs, \%bad_configs;
+
+	if (!$runtest) {
+	    doprint "Set tmp config to be bad config with some good config values\n";
+
+	    my %tmp_config = make_half \@tophalf, \%bad_configs,
+		\%good_configs, "top", "good";
+
+	    $runtest = process_new_config \%tmp_config, \%new_configs,
+		\%good_configs, \%bad_configs;
+	}
+    }
+
+    if (!$runtest && $len_diff > 0) {
+	# do the same thing, but this time with bottom half
+
+	my $half = int($#diff_arr / 2);
+	my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
+
+	doprint "Set tmp config to be good config with some bad config values\n";
+
+	my %tmp_config = make_half \@bottomhalf, \%good_configs,
+	    \%bad_configs, "bottom", "bad";
+
+	$runtest = process_new_config \%tmp_config, \%new_configs,
+			    \%good_configs, \%bad_configs;
+
+	if (!$runtest) {
+	    doprint "Set tmp config to be bad config with some good config values\n";
+
+	    my %tmp_config = make_half \@bottomhalf, \%bad_configs,
+		\%good_configs, "bottom", "good";
+
+	    $runtest = process_new_config \%tmp_config, \%new_configs,
+		\%good_configs, \%bad_configs;
+	}
+    }
+
+    if ($runtest) {
+	make_oldconfig;
+	doprint "READY TO TEST .config IN $build\n";
+	return 0;
+    }
+
+    doprint "\n%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+    doprint "Hmm, can't make any more changes without making good == bad?\n";
+    doprint "Difference between good (+) and bad (-)\n";
+
+    foreach my $item (keys %bad_configs) {
+	if (!defined($good_configs{$item})) {
+	    print_config "-", $bad_configs{$item};
+	}
+    }
+
+    foreach my $item (keys %good_configs) {
+	next if (!defined($bad_configs{$item}));
+	if ($good_configs{$item} ne $bad_configs{$item}) {
+	    print_config_compare $good_configs{$item}, $bad_configs{$item};
+	}
+    }
+
+    foreach my $item (keys %good_configs) {
+	if (!defined($bad_configs{$item})) {
+	    print_config "+", $good_configs{$item};
+	}
+    }
+    return -1;
+}
+
+sub config_bisect {
+    my ($good_config, $bad_config) = @_;
+    my $ret;
+
+    my %good_configs;
+    my %bad_configs;
+    my %tmp_configs;
+
+    doprint "Run good configs through make oldconfig\n";
+    assign_configs \%tmp_configs, $good_config;
+    create_config "$good_config", \%tmp_configs;
+    assign_configs \%good_configs, $output_config;
+
+    doprint "Run bad configs through make oldconfig\n";
+    assign_configs \%tmp_configs, $bad_config;
+    create_config "$bad_config", \%tmp_configs;
+    assign_configs \%bad_configs, $output_config;
+
+    save_config \%good_configs, $good_config;
+    save_config \%bad_configs, $bad_config;
+
+    return run_config_bisect \%good_configs, \%bad_configs;
+}
+
+while ($#ARGV >= 0) {
+    if ($ARGV[0] !~ m/^-/) {
+	last;
+    }
+    my $opt = shift @ARGV;
+
+    if ($opt eq "-b") {
+	$val = shift @ARGV;
+	if (!defined($val)) {
+	    die "-b requires value\n";
+	}
+	$build = $val;
+    }
+
+    elsif ($opt eq "-l") {
+	$val = shift @ARGV;
+	if (!defined($val)) {
+	    die "-l requires value\n";
+	}
+	$tree = $val;
+    }
+
+    elsif ($opt eq "-r") {
+	$reset_bisect = 1;
+    }
+
+    elsif ($opt eq "-h") {
+	usage;
+    }
+
+    else {
+	die "Unknow option $opt\n";
+    }
+}
+
+$build = $tree if (!defined($build));
+
+$tree = expand_path $tree;
+$build = expand_path $build;
+
+if ( ! -d $tree ) {
+    die "$tree not a directory\n";
+}
+
+if ( ! -d $build ) {
+    die "$build not a directory\n";
+}
+
+usage if $#ARGV < 1;
+
+if ($#ARGV == 1) {
+    $start = 1;
+} elsif ($#ARGV == 2) {
+    $val = $ARGV[2];
+    if ($val ne "good" && $val ne "bad") {
+	die "Unknown command '$val', bust be either \"good\" or \"bad\"\n";
+    }
+} else {
+    usage;
+}
+
+my $good_start = expand_path $ARGV[0];
+my $bad_start = expand_path $ARGV[1];
+
+my $good = "$good_start.tmp";
+my $bad = "$bad_start.tmp";
+
+$make = "make";
+
+if ($build ne $tree) {
+    $make = "make O=$build"
+}
+
+$output_config = "$build/.config";
+
+if ($start) {
+    if ( ! -f $good_start ) {
+	die "$good_start not found\n";
+    }
+    if ( ! -f $bad_start ) {
+	die "$bad_start not found\n";
+    }
+    if ( -f $good || -f $bad ) {
+	my $p = "";
+
+	if ( -f $good ) {
+	    $p = "$good exists\n";
+	}
+
+	if ( -f $bad ) {
+	    $p = "$p$bad exists\n";
+	}
+
+	if (!defined($reset_bisect)) {
+	    if (!read_yn "${p}Overwrite and start new bisect anyway?") {
+		exit (-1);
+	    }
+	}
+    }
+    run_command "cp $good_start $good" or die "failed to copy to $good\n";
+    run_command "cp $bad_start $bad" or die "faield to copy to $bad\n";
+} else {
+    if ( ! -f $good ) {
+	die "Can not find file $good\n";
+    }
+    if ( ! -f $bad ) {
+	die "Can not find file $bad\n";
+    }
+    if ($val eq "good") {
+	run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+    } elsif ($val eq "bad") {
+	run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+    }
+}
+
+chdir $tree || die "can't change directory to $tree";
+
+my $ret = config_bisect $good, $bad;
+
+if (!$ret) {
+    exit(0);
+}
+
+if ($ret > 0) {
+    doprint "Cleaning temp files\n";
+    run_command "rm $good";
+    run_command "rm $bad";
+    exit(1);
+} else {
+    doprint "See good and bad configs for details:\n";
+    doprint "good: $good\n";
+    doprint "bad:  $bad\n";
+    doprint "%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+}
+exit(2);
diff --git a/tools/testing/ktest/examples/README b/tools/testing/ktest/examples/README
new file mode 100644
index 000000000..a12d295a0
--- /dev/null
+++ b/tools/testing/ktest/examples/README
@@ -0,0 +1,32 @@
+This directory contains example configs to use ktest for various tasks.
+The configs still need to be customized for your environment, but it
+is broken up by task which makes it easier to understand how to set up
+ktest.
+
+The configs are based off of real working configs but have been modified
+and commented to show more generic use cases that are more helpful for
+developers.
+
+crosstests.conf - this config shows an example of testing a git repo against
+    lots of different architectures. It only does build tests, but makes
+    it easy to compile test different archs. You can download the arch
+    cross compilers from:
+  http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+
+test.conf - A generic example of a config. This is based on an actual config
+     used to perform real testing.
+
+kvm.conf - A example of a config that is used to test a virtual guest running
+     on a host.
+
+snowball.conf - An example config that was used to demo ktest.pl against
+     a snowball ARM board.
+
+include/  -  The include directory holds default configs that can be
+    included into other configs. This is a real use example that shows how
+    to reuse configs for various machines or set ups. The files here
+    are included by other config files, where the other config files define
+    options and variables that will make the included config work for the
+    given environment.
+
+
diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf
new file mode 100644
index 000000000..6907f3259
--- /dev/null
+++ b/tools/testing/ktest/examples/crosstests.conf
@@ -0,0 +1,225 @@
+#
+# Example config for cross compiling
+#
+# In this config, it is expected that the tool chains from:
+#
+#   http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+#
+# running on a x86_64 system have been downloaded and installed into:
+#
+#   /usr/local/
+#
+# such that the compiler binaries are something like:
+#
+#   /usr/local/gcc-4.5.2-nolibc/mips-linux/bin/mips-linux-gcc
+#
+# Some of the archs will use gcc-4.5.1 instead of gcc-4.5.2
+# this config uses variables to differentiate them.
+# 
+# Comments describe some of the options, but full descriptions of
+# options are described in the samples.conf file.
+
+# ${PWD} is defined by ktest.pl to be the directory that the user
+# was in when they executed ktest.pl. It may be better to hardcode the
+# path name here. THIS_DIR is the variable used through out the config file
+# in case you want to change it.
+
+THIS_DIR := ${PWD}
+
+# Update the BUILD_DIR option to the location of your git repo you want to test.
+BUILD_DIR = ${THIS_DIR}/linux.git
+
+# The build will go into this directory. It will be created when you run the test.
+OUTPUT_DIR = ${THIS_DIR}/cross-compile
+
+# The build will be compiled with -j8
+BUILD_OPTIONS = -j8
+
+# The test will not stop when it hits a failure.
+DIE_ON_FAILURE = 0
+
+# If you want to have ktest.pl store the failure somewhere, uncomment this option
+# and change the directory where ktest should store the failures.
+#STORE_FAILURES = ${THIS_DIR}/failures
+
+# The log file is stored in the OUTPUT_DIR called cross.log
+# If you enable this, you need to create the OUTPUT_DIR. It wont be created for you.
+LOG_FILE = ${OUTPUT_DIR}/cross.log
+
+# The log file will be cleared each time you run ktest.
+CLEAR_LOG = 1
+
+# As some archs do not build with the defconfig, they have been marked
+# to be ignored. If you want to test them anyway, change DO_FAILED to 1.
+# If a test that has been marked as DO_FAILED passes, then you should change
+# that test to be DO_DEFAULT
+
+DO_FAILED := 0
+DO_DEFAULT := 1
+
+# By setting both DO_FAILED and DO_DEFAULT to zero, you can pick a single
+# arch that you want to test. (uncomment RUN and chose your arch)
+#RUN := arm
+
+# At the bottom of the config file exists a bisect test. You can update that
+# test and set DO_FAILED and DO_DEFAULT to zero, and uncomment this variable
+# to run the bisect on the arch.
+#RUN := bisect
+
+# By default all tests will be running gcc 4.5.2. Some tests are using 4.5.1
+# and they select that in the test.
+# Note: GCC_VER is declared as on option and not a variable ('=' instead of ':=')
+# This is important. A variable is used only in the config file and if it is set
+# it stays that way for the rest of the config file until it is change again.
+# Here we want GCC_VER to remain persistent and change for each test, as it is used in
+# the MAKE_CMD. By using '=' instead of ':=' we achieve our goal.
+
+GCC_VER = 4.5.2
+MAKE_CMD = PATH=/usr/local/gcc-${GCC_VER}-nolibc/${CROSS}/bin:$PATH CROSS_COMPILE=${CROSS}- make ARCH=${ARCH}
+
+# all tests are only doing builds.
+TEST_TYPE = build
+
+# If you want to add configs on top of the defconfig, you can add those configs into
+# the add-config file and uncomment this option. This is useful if you want to test
+# all cross compiles with PREEMPT set, or TRACING on, etc.
+#ADD_CONFIG = ${THIS_DIR}/add-config
+
+# All tests are using defconfig
+BUILD_TYPE = defconfig
+
+# The test names will have the arch and cross compiler used. This will be shown in
+# the results.
+TEST_NAME = ${ARCH} ${CROSS}
+
+# alpha
+TEST_START IF ${RUN} == alpha || ${DO_DEFAULT}
+# Notice that CROSS and ARCH are also options and not variables (again '=' instead
+# of ':='). This is because TEST_NAME and MAKE_CMD wil use them for each test.
+# Only options are available during runs. Variables are only present in parsing the
+# config file.
+CROSS = alpha-linux
+ARCH = alpha
+
+# arm
+TEST_START IF ${RUN} == arm || ${DO_DEFAULT}
+CROSS = arm-unknown-linux-gnueabi
+ARCH = arm
+
+# ia64
+TEST_START IF ${RUN} == ia64 || ${DO_DEFAULT}
+CROSS = ia64-linux
+ARCH = ia64
+
+# m68k fails with error?
+TEST_START IF ${RUN} == m68k || ${DO_DEFAULT}
+CROSS = m68k-linux
+ARCH = m68k
+
+# mips64
+TEST_START IF ${RUN} == mips || ${RUN} == mips64 || ${DO_DEFAULT}
+CROSS = mips64-linux
+ARCH = mips
+
+# mips32
+TEST_START IF ${RUN} == mips || ${RUN} == mips32 || ${DO_DEFAULT}
+CROSS = mips-linux
+ARCH = mips
+
+# parisc64 failed?
+TEST_START IF ${RUN} == hppa || ${RUN} == hppa64 || ${DO_FAILED}
+CROSS = hppa64-linux
+ARCH = parisc
+
+# parisc
+TEST_START IF ${RUN} == hppa || ${RUN} == hppa32 || ${DO_FAILED}
+CROSS = hppa-linux
+ARCH = parisc
+
+# ppc
+TEST_START IF ${RUN} == ppc || ${RUN} == ppc32 || ${DO_DEFAULT}
+CROSS = powerpc-linux
+ARCH = powerpc
+
+# ppc64
+TEST_START IF ${RUN} == ppc || ${RUN} == ppc64 || ${DO_DEFAULT}
+CROSS = powerpc64-linux
+ARCH = powerpc
+
+# s390
+TEST_START IF ${RUN} == s390 || ${DO_DEFAULT}
+CROSS = s390x-linux
+ARCH = s390
+
+# sh
+TEST_START IF ${RUN} == sh || ${DO_DEFAULT}
+CROSS = sh4-linux
+ARCH = sh
+
+# sparc64
+TEST_START IF ${RUN} == sparc || ${RUN} == sparc64 || ${DO_DEFAULT}
+CROSS = sparc64-linux
+ARCH = sparc64
+
+# sparc
+TEST_START IF ${RUN} == sparc || ${RUN} == sparc32 || ${DO_DEFAULT}
+CROSS = sparc-linux
+ARCH = sparc
+
+# xtensa failed
+TEST_START IF ${RUN} == xtensa || ${DO_FAILED}
+CROSS = xtensa-linux
+ARCH = xtensa
+
+# UML
+TEST_START IF ${RUN} == uml || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=um SUBARCH=x86_64
+ARCH = uml
+CROSS =
+
+TEST_START IF ${RUN} == x86 || ${RUN} == i386 || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=i386
+ARCH = i386
+CROSS = 
+
+TEST_START IF ${RUN} == x86 || ${RUN} == x86_64 || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=x86_64
+ARCH = x86_64
+CROSS = 
+
+#################################
+
+# This is a bisect if needed. You need to give it a MIN_CONFIG that
+# will be the config file it uses. Basically, just copy the created defconfig
+# for the arch someplace and point MIN_CONFIG to it.
+TEST_START IF ${RUN} == bisect
+MIN_CONFIG = ${THIS_DIR}/min-config
+CROSS = s390x-linux
+ARCH = s390
+TEST_TYPE = bisect
+BISECT_TYPE = build
+BISECT_GOOD = v3.1
+BISECT_BAD = v3.2
+CHECKOUT = v3.2
+
+#################################
+
+# These defaults are needed to keep ktest.pl from complaining. They are
+# ignored because the test does not go pass the build. No install or
+# booting of the target images.
+
+DEFAULTS
+MACHINE = crosstest
+SSH_USER = root
+BUILD_TARGET = cross
+TARGET_IMAGE = image
+POWER_CYCLE = cycle
+CONSOLE = console
+LOCALVERSION = version
+GRUB_MENU = grub
+
+REBOOT_ON_ERROR = 0
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+REBOOT_ON_SUCCESS = 0
+
diff --git a/tools/testing/ktest/examples/include/bisect.conf b/tools/testing/ktest/examples/include/bisect.conf
new file mode 100644
index 000000000..009bea65b
--- /dev/null
+++ b/tools/testing/ktest/examples/include/bisect.conf
@@ -0,0 +1,90 @@
+#
+# This example shows the bisect tests (git bisect and config bisect)
+#
+
+
+# The config that includes this file may define a RUN_TEST
+# variable that will tell this config what test to run.
+# (what to set the TEST option to).
+#
+DEFAULTS IF NOT DEFINED RUN_TEST
+# Requires that hackbench is in the PATH
+RUN_TEST := ${SSH} hackbench 50
+
+
+# Set TEST to 'bisect' to do a normal git bisect. You need
+# to modify the options below to make it bisect the exact
+# commits you are interested in.
+#
+TEST_START IF ${TEST} == bisect
+TEST_TYPE = bisect
+# You must set the commit that was considered good (git bisect good)
+BISECT_GOOD = v3.3
+# You must set the commit that was considered bad (git bisect bad)
+BISECT_BAD = HEAD
+# It's best to specify the branch to checkout before starting the bisect.
+CHECKOUT = origin/master
+# This can be build, boot, or test. Here we are doing a bisect
+# that requires to run a test to know if the bisect was good or bad.
+# The test should exit with 0 on good, non-zero for bad. But see
+# the BISECT_RET_* options in samples.conf to override this.
+BISECT_TYPE = test
+TEST = ${RUN_TEST}
+# It is usually a good idea to confirm that the GOOD and the BAD
+# commits are truly good and bad respectively. Having BISECT_CHECK
+# set to 1 will check both that the good commit works and the bad
+# commit fails. If you only want to check one or the other,
+# set BISECT_CHECK to 'good' or to 'bad'.
+BISECT_CHECK = 1
+#BISECT_CHECK = good
+#BISECT_CHECK = bad
+
+# Usually it's a good idea to specify the exact config you
+# want to use throughout the entire bisect. Here we placed
+# it in the directory we called ktest.pl from and named it
+# 'config-bisect'.
+MIN_CONFIG = ${THIS_DIR}/config-bisect
+# By default, if we are doing a BISECT_TYPE = test run but the
+# build or boot fails, ktest.pl will do a 'git bisect skip'.
+# Uncomment the below option to make ktest stop testing on such
+# an error.
+#BISECT_SKIP = 0
+# Now if you had BISECT_SKIP = 0 and the test fails, you can
+# examine what happened and then do 'git bisect log > /tmp/replay'
+# Set BISECT_REPLAY to /tmp/replay and ktest.pl will run the
+# 'git bisect replay /tmp/replay' before continuing the bisect test.
+#BISECT_REPLAY = /tmp/replay
+# If you used BISECT_REPLAY after the bisect test failed, you may
+# not want to continue the bisect on that commit that failed.
+# By setting BISECT_START to a new commit. ktest.pl will checkout
+# that commit after it has performed the 'git bisect replay' but
+# before it continues running the bisect test.
+#BISECT_START = 2545eb6198e7e1ec50daa0cfc64a4cdfecf24ec9
+
+# Now if you don't trust ktest.pl to make the decisions for you, then
+# set BISECT_MANUAL to 1. This will cause ktest.pl not to decide
+# if the commit was good or bad. Instead, it will ask you to tell
+# it if the current commit was good. In the mean time, you could
+# take the result, load it on any machine you want. Run several tests,
+# or whatever you feel like. Then, when you are happy, you can tell
+# ktest if you think it was good or not and ktest.pl will continue
+# the git bisect. You can even change what commit it is currently at.
+#BISECT_MANUAL = 1
+
+
+# One of the unique tests that ktest does is the config bisect.
+# Currently (which hopefully will be fixed soon), the bad config
+# must be a superset of the good config. This is because it only
+# searches for a config that causes the target to fail. If the
+# good config is not a subset of the bad config, or if the target
+# fails because of a lack of a config, then it will not find
+# the config for you.
+TEST_START IF ${TEST} == config-bisect
+TEST_TYPE = config_bisect
+# set to build, boot, test
+CONFIG_BISECT_TYPE = boot
+# Set the config that is considered bad.
+CONFIG_BISECT = ${THIS_DIR}/config-bad
+# This config is optional. By default it uses the
+# MIN_CONFIG as the good config.
+CONFIG_BISECT_GOOD = ${THIS_DIR}/config-good
diff --git a/tools/testing/ktest/examples/include/defaults.conf b/tools/testing/ktest/examples/include/defaults.conf
new file mode 100644
index 000000000..63a1a83f4
--- /dev/null
+++ b/tools/testing/ktest/examples/include/defaults.conf
@@ -0,0 +1,157 @@
+# This file holds defaults for most the tests. It defines the options that
+# are most common to tests that are likely to be shared.
+#
+# Note, after including this file, a config file may override any option
+# with a DEFAULTS OVERRIDE section.
+#
+
+# For those cases that use the same machine to boot a 64 bit
+# and a 32 bit version. The MACHINE is the DNS name to get to the
+# box (usually different if it was 64 bit or 32 bit) but the
+# BOX here is defined as a variable that will be the name of the box
+# itself. It is useful for calling scripts that will power cycle
+# the box, as only one script needs to be created to power cycle
+# even though the box itself has multiple operating systems on it.
+# By default, BOX and MACHINE are the same.
+
+DEFAULTS IF NOT DEFINED BOX
+BOX := ${MACHINE}
+
+
+# Consider each box as 64 bit box, unless the config including this file
+# has defined BITS = 32
+
+DEFAULTS IF NOT DEFINED BITS
+BITS := 64
+
+
+DEFAULTS
+
+# THIS_DIR is used through out the configs and defaults to ${PWD} which
+# is the directory that ktest.pl was called from.
+
+THIS_DIR := ${PWD}
+
+
+# to organize your configs, having each machine save their configs
+# into a separate directly is useful.
+CONFIG_DIR := ${THIS_DIR}/configs/${MACHINE}
+
+# Reset the log before running each test.
+CLEAR_LOG = 1
+
+# As installing kernels usually requires root privilege, default the
+# user on the target as root. It is also required that the target
+# allows ssh to root from the host without asking for a password.
+
+SSH_USER = root
+
+# For accesing the machine, we will ssh to root@machine.
+SSH := ssh ${SSH_USER}@${MACHINE}
+
+# Update this. The default here is ktest will ssh to the target box
+# and run a script called 'run-test' located on that box.
+TEST = ${SSH} run-test
+
+# Point build dir to the git repo you use
+BUILD_DIR = ${THIS_DIR}/linux.git
+
+# Each machine will have its own output build directory.
+OUTPUT_DIR = ${THIS_DIR}/build/${MACHINE}
+
+# Yes this config is focused on x86 (but ktest works for other archs too)
+BUILD_TARGET = arch/x86/boot/bzImage
+TARGET_IMAGE = /boot/vmlinuz-test
+
+# have directory for the scripts to reboot and power cycle the boxes
+SCRIPTS_DIR := ${THIS_DIR}/scripts
+
+# You can have each box/machine have a script to power cycle it.
+# Name your script <box>-cycle.
+POWER_CYCLE = ${SCRIPTS_DIR}/${BOX}-cycle
+
+# This script is used to power off the box.
+POWER_OFF = ${SCRIPTS_DIR}/${BOX}-poweroff
+
+# Keep your test kernels separate from your other kernels.
+LOCALVERSION = -test
+
+# The /boot/grub/menu.lst is searched for the line:
+#  title Test Kernel
+# and ktest will use that kernel to reboot into.
+# For grub2 or other boot loaders, you need to set BOOT_TYPE
+# to 'script' and define other ways to load the kernel.
+# See snowball.conf example.
+#
+GRUB_MENU = Test Kernel
+
+# The kernel build will use this option.
+BUILD_OPTIONS = -j8
+
+# Keeping the log file with the output dir is convenient.
+LOG_FILE = ${OUTPUT_DIR}/${MACHINE}.log
+
+# Each box should have their own minum configuration
+# See min-config.conf
+MIN_CONFIG = ${CONFIG_DIR}/config-min
+
+# For things like randconfigs, there may be configs you find that
+# are already broken, or there may be some configs that you always
+# want set. Uncomment ADD_CONFIG and point it to the make config files
+# that set the configs you want to keep on (or off) in your build.
+# ADD_CONFIG is usually something to add configs to all machines,
+# where as, MIN_CONFIG is specific per machine.
+#ADD_CONFIG = ${THIS_DIR}/config-broken ${THIS_DIR}/config-general
+
+# To speed up reboots for bisects and patchcheck, instead of
+# waiting 60 seconds for the console to be idle, if this line is
+# seen in the console output, ktest will know the good kernel has
+# finished rebooting and it will be able to continue the tests.
+REBOOT_SUCCESS_LINE = ${MACHINE} login:
+
+# The following is different ways to end the test.
+# by setting the variable REBOOT to: none, error, fail or
+# something else, ktest will power cycle or reboot the target box
+# at the end of the tests.
+#
+# REBOOT := none
+#   Don't do anything at the end of the test.
+#
+# REBOOT := error
+#   Reboot the box if ktest detects an error
+#
+# REBOOT := fail
+#   Do not stop on failure, and after all tests are complete
+#   power off the box (for both success and error)
+#   This is good to run over a weekend and you don't want to waste
+#   electricity.
+#
+
+DEFAULTS IF ${REBOOT} == none
+REBOOT_ON_SUCCESS = 0
+REBOOT_ON_ERROR = 0
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+
+DEFAULTS ELSE IF ${REBOOT} == error
+REBOOT_ON_SUCCESS = 0
+REBOOT_ON_ERROR = 1
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+
+DEFAULTS ELSE IF ${REBOOT} == fail
+REBOOT_ON_SUCCESS = 0
+POWEROFF_ON_ERROR = 1
+POWEROFF_ON_SUCCESS = 1
+POWEROFF_AFTER_HALT = 120
+DIE_ON_FAILURE = 0
+
+# Store the failure information into this directory
+# such as the .config, dmesg, and build log.
+STORE_FAILURES = ${THIS_DIR}/failures
+
+DEFAULTS ELSE
+REBOOT_ON_SUCCESS = 1
+REBOOT_ON_ERROR = 1
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
diff --git a/tools/testing/ktest/examples/include/min-config.conf b/tools/testing/ktest/examples/include/min-config.conf
new file mode 100644
index 000000000..c703cc46d
--- /dev/null
+++ b/tools/testing/ktest/examples/include/min-config.conf
@@ -0,0 +1,60 @@
+#
+# This file has some examples for creating a MIN_CONFIG.
+# (A .config file that is the minimum for a machine to boot, or
+#  to boot and make a network connection.)
+#
+# A MIN_CONFIG is very useful as it is the minimum configuration
+# needed to boot a given machine. You can debug someone else's
+# .config by only setting the configs in your MIN_CONFIG. The closer
+# your MIN_CONFIG is to the true minimum set of configs needed to
+# boot your machine, the closer the config you test with will be
+# to the users config that had the failure.
+#
+# The make_min_config test allows you to create a MIN_CONFIG that
+# is truly the minimum set of configs needed to boot a box.
+#
+# In this example, the final config will reside in
+# ${CONFIG_DIR}/config-new-min and ${CONFIG_DIR}/config-new-min-net.
+# Just move one to the location you have set for MIN_CONFIG.
+#
+# The first test creates a MIN_CONFIG that will be the minimum
+# configuration to boot ${MACHINE} and be able to ssh to it.
+#
+# The second test creates a MIN_CONFIG that will only boot
+# the target and most likely will not let you ssh to it. (Notice
+# how the second test uses the first test's result to continue with.
+# This is because the second test config is a subset of the first).
+#
+# The ${CONFIG_DIR}/config-skip (and -net) will hold the configs
+# that ktest.pl found would not boot the target without them set.
+# The config-new-min holds configs that ktest.pl could not test
+# directly because another config that was needed to boot the box
+# selected them. Sometimes it is possible that this file will hold
+# the true minimum configuration. You can test to see if this is
+# the case by running the boot test with BOOT_TYPE = allnoconfig and
+# setting setting the MIN_CONFIG to ${CONFIG_DIR}/config-skip. If the
+# machine still boots, then you can use the config-skip as your MIN_CONFIG.
+#
+# These tests can run for several hours (and perhaps days).
+# It's OK to kill the test with a Ctrl^C. By restarting without
+# modifying this config, ktest.pl will notice that the config-new-min(-net)
+# exists, and will use that instead as the starting point.
+# The USE_OUTPUT_MIN_CONFIG is set to 1 to keep ktest.pl from asking
+# you if you want to use the OUTPUT_MIN_CONFIG as the starting point.
+# By using the OUTPUT_MIN_CONFIG as the starting point will allow ktest.pl to
+# start almost where it left off.
+#
+TEST_START IF ${TEST} == min-config
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net
+IGNORE_CONFIG = ${CONFIG_DIR}/config-skip-net
+MIN_CONFIG_TYPE = test
+TEST = ${SSH} echo hi
+USE_OUTPUT_MIN_CONFIG = 1
+
+TEST_START IF ${TEST} == min-config && ${MULTI}
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min
+IGNORE_CONFIG = ${CONFIG_DIR}/config-skip
+MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net
+USE_OUTPUT_MIN_CONFIG = 1
diff --git a/tools/testing/ktest/examples/include/patchcheck.conf b/tools/testing/ktest/examples/include/patchcheck.conf
new file mode 100644
index 000000000..0eb0a5ac7
--- /dev/null
+++ b/tools/testing/ktest/examples/include/patchcheck.conf
@@ -0,0 +1,111 @@
+# patchcheck.conf
+#
+# This contains a test that takes two git commits and will test each
+# commit between the two. The build test will look at what files the
+# commit has touched, and if any of those files produce a warning, then
+# the build will fail.
+
+
+# PATCH_START is the commit to begin with and PATCH_END is the commit
+# to end with (inclusive). This is similar to doing a git rebase -i PATCH_START~1
+# and then testing each commit and doing a git rebase --continue.
+# You can use a SHA1, a git tag, or anything that git will accept for a checkout
+
+PATCH_START := HEAD~3
+PATCH_END := HEAD
+
+# Use the oldconfig if build_type wasn't defined
+DEFAULTS IF NOT DEFINED BUILD_TYPE
+DO_BUILD_TYPE := oldconfig
+
+DEFAULTS ELSE
+DO_BUILD_TYPE := ${BUILD_TYPE}
+
+DEFAULTS
+
+
+# Change PATCH_CHECKOUT to be the branch you want to test. The test will
+# do a git checkout of this branch before starting. Obviously both
+# PATCH_START and PATCH_END must be in this branch (and PATCH_START must
+# be contained by PATCH_END).
+
+PATCH_CHECKOUT := test/branch
+
+# Usually it's a good idea to have a set config to use for testing individual
+# patches.
+PATCH_CONFIG := ${CONFIG_DIR}/config-patchcheck
+
+# Change PATCH_TEST to run some test for each patch. Each commit that is
+# tested, after it is built and installed on the test machine, this command
+# will be executed. Usually what is done is to ssh to the target box and
+# run some test scripts. If you just want to boot test your patches
+# comment PATCH_TEST out.
+PATCH_TEST := ${SSH} "/usr/local/bin/ktest-test-script"
+
+DEFAULTS IF DEFINED PATCH_TEST
+PATCH_TEST_TYPE := test
+
+DEFAULTS ELSE
+PATCH_TEST_TYPE := boot
+
+# If for some reason a file has a warning that one of your patches touch
+# but you do not care about it, set IGNORE_WARNINGS to that commit(s)
+# (space delimited)
+#IGNORE_WARNINGS = 39eaf7ef884dcc44f7ff1bac803ca2a1dcf43544 6edb2a8a385f0cdef51dae37ff23e74d76d8a6ce
+
+# Instead of just checking for warnings to files that are changed
+# it can be advantageous to check for any new warnings. If a
+# header file is changed, it could cause a warning in a file not
+# touched by the commit. To detect these kinds of warnings, you
+# can use the WARNINGS_FILE option.
+#
+# If the variable CREATE_WARNINGS_FILE is set, this config will
+# enable the WARNINGS_FILE during the patchcheck test. Also,
+# before running the patchcheck test, it will create the
+# warnings file.
+#
+DEFAULTS IF DEFINED CREATE_WARNINGS_FILE
+WARNINGS_FILE = ${OUTPUT_DIR}/warnings_file
+
+TEST_START IF DEFINED CREATE_WARNINGS_FILE
+# WARNINGS_FILE is already set by the DEFAULTS above
+TEST_TYPE = make_warnings_file
+# Checkout the commit before the patches to test,
+# and record all the warnings that exist before the patches
+# to test are added
+CHECKOUT = ${PATCHCHECK_START}~1
+# Force a full build
+BUILD_NOCLEAN = 0
+BUILD_TYPE = ${DO_BUILD_TYPE}
+
+# If you are running a multi test, and the test failed on the first
+# test but on, say the 5th patch. If you want to restart on the
+# fifth patch, set PATCH_START1. This will make the first test start
+# from this commit instead of the PATCH_START commit.
+# Note, do not change this option. Just define PATCH_START1 in the
+# top config (the one you pass to ktest.pl), and this will use it,
+# otherwise it will just use PATCH_START if PATCH_START1 is not defined.
+DEFAULTS IF NOT DEFINED PATCH_START1
+PATCH_START1 := ${PATCH_START}
+
+TEST_START IF ${TEST} == patchcheck
+TEST_TYPE = patchcheck
+MIN_CONFIG = ${PATCH_CONFIG}
+TEST = ${PATCH_TEST}
+PATCHCHECK_TYPE = ${PATCH_TEST_TYPE}
+PATCHCHECK_START = ${PATCH_START1}
+PATCHCHECK_END = ${PATCH_END}
+CHECKOUT = ${PATCH_CHECKOUT}
+BUILD_TYPE = ${DO_BUILD_TYPE}
+
+TEST_START IF ${TEST} == patchcheck && ${MULTI}
+TEST_TYPE = patchcheck
+MIN_CONFIG = ${PATCH_CONFIG}
+TEST = ${PATCH_TEST}
+PATCHCHECK_TYPE = ${PATCH_TEST_TYPE}
+PATCHCHECK_START = ${PATCH_START}
+PATCHCHECK_END = ${PATCH_END}
+CHECKOUT = ${PATCH_CHECKOUT}
+# Use multi to test different compilers?
+MAKE_CMD = CC=gcc-4.5.1 make
+BUILD_TYPE = ${DO_BUILD_TYPE}
diff --git a/tools/testing/ktest/examples/include/tests.conf b/tools/testing/ktest/examples/include/tests.conf
new file mode 100644
index 000000000..60cedb1a1
--- /dev/null
+++ b/tools/testing/ktest/examples/include/tests.conf
@@ -0,0 +1,74 @@
+#
+# This is an example of various tests that you can run
+#
+# The variable TEST can be of boot, build, randconfig, or test.
+#
+# Note that TEST is a variable created with ':=' and only exists
+# throughout the config processing (not during the tests itself).
+#
+# The TEST option (defined with '=') is used to tell ktest.pl
+# what test to run after a successful boot. The TEST option is
+# persistent into the test runs.
+#
+
+# The config that includes this file may define a BOOT_TYPE
+# variable that tells this config what type of boot test to run.
+# If it's not defined, the below DEFAULTS will set the default
+# to 'oldconfig'.
+#
+DEFAULTS IF NOT DEFINED BOOT_TYPE
+BOOT_TYPE := oldconfig
+
+# The config that includes this file may define a RUN_TEST
+# variable that will tell this config what test to run.
+# (what to set the TEST option to).
+#
+DEFAULTS IF NOT DEFINED RUN_TEST
+# Requires that hackbench is in the PATH
+RUN_TEST := ${SSH} hackbench 50
+
+
+# If TEST is set to 'boot' then just build a kernel and boot
+# the target.
+TEST_START IF ${TEST} == boot
+TEST_TYPE = boot
+# Notice how we set the BUILD_TYPE option to the BOOT_TYPE variable.
+BUILD_TYPE = ${BOOT_TYPE}
+# Do not do a make mrproper.
+BUILD_NOCLEAN = 1
+
+# If you only want to build the kernel, and perhaps install
+# and test it yourself, then just set TEST to build.
+TEST_START IF ${TEST} == build
+TEST_TYPE = build
+BUILD_TYPE = ${BOOT_TYPE}
+BUILD_NOCLEAN = 1
+
+# Build, install, boot and test with a randconfg 10 times.
+# It is important that you have set MIN_CONFIG in the config
+# that includes this file otherwise it is likely that the
+# randconfig will not have the necessary configs needed to
+# boot your box. This version of the test requires a min
+# config that has enough to make sure the target has network
+# working.
+TEST_START ITERATE 10 IF ${TEST} == randconfig
+MIN_CONFIG = ${CONFIG_DIR}/config-min-net
+TEST_TYPE = test
+BUILD_TYPE = randconfig
+TEST = ${RUN_TEST}
+
+# This is the same as above, but only tests to a boot prompt.
+# The MIN_CONFIG used here does not need to have networking
+# working.
+TEST_START ITERATE 10 IF ${TEST} == randconfig && ${MULTI}
+TEST_TYPE = boot
+BUILD_TYPE = randconfig
+MIN_CONFIG = ${CONFIG_DIR}/config-min
+MAKE_CMD = make
+
+# This builds, installs, boots and tests the target.
+TEST_START IF ${TEST} == test
+TEST_TYPE = test
+BUILD_TYPE = ${BOOT_TYPE}
+TEST = ${RUN_TEST}
+BUILD_NOCLEAN = 1
diff --git a/tools/testing/ktest/examples/kvm.conf b/tools/testing/ktest/examples/kvm.conf
new file mode 100644
index 000000000..fbc134f9a
--- /dev/null
+++ b/tools/testing/ktest/examples/kvm.conf
@@ -0,0 +1,92 @@
+#
+# This config is an example usage of ktest.pl with a kvm guest
+#
+# The guest is called 'Guest' and this would be something that
+# could be run on the host to test a virtual machine target.
+
+MACHINE = Guest
+
+
+# Use virsh to read the serial console of the guest
+CONSOLE =  virsh console ${MACHINE}
+
+# Use SIGKILL to terminate virsh console. We can't kill virsh console
+# by the default signal, SIGINT.
+CLOSE_CONSOLE_SIGNAL = KILL
+
+#*************************************#
+# This part is the same as test.conf  #
+#*************************************#
+
+# The include files will set up the type of test to run. Just set TEST to
+# which test you want to run.
+#
+# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
+#
+# See the include/*.conf files that define these tests
+#
+TEST := patchcheck
+
+# Some tests may have more than one test to run. Define MULTI := 1 to run
+# the extra tests.
+MULTI := 0
+
+# In case you want to differentiate which type of system you are testing
+BITS := 64
+
+# REBOOT = none, error, fail, empty
+#  See include/defaults.conf
+REBOOT := empty
+
+
+# The defaults file will set up various settings that can be used by all
+# machine configs.
+INCLUDE include/defaults.conf
+
+
+#*************************************#
+# Now we are different from test.conf #
+#*************************************#
+
+
+# The example here assumes that Guest is running a Fedora release
+# that uses dracut for its initfs. The POST_INSTALL will be executed
+# after the install of the kernel and modules are complete.
+#
+POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# Guests sometimes get stuck on reboot. We wait 3 seconds after running
+# the reboot command and then  do a full power-cycle of the guest.
+# This forces the guest to restart.
+#
+POWERCYCLE_AFTER_REBOOT = 3
+
+# We do the same after the halt command, but this time we wait 20 seconds.
+POWEROFF_AFTER_HALT = 20
+
+
+# As the defaults.conf file has a POWER_CYCLE option already defined,
+# and options can not be defined in the same section more than once
+# (all DEFAULTS sections are considered the same). We use the
+# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined
+# options, for the options set in the OVERRIDE section.
+#
+DEFAULTS OVERRIDE
+
+# Instead of using the default POWER_CYCLE option defined in
+# defaults.conf, we use virsh to cycle it. To do so, we destroy
+# the guest, wait 5 seconds, and then start it up again.
+# Crude, but effective.
+#
+POWER_CYCLE = virsh destroy ${MACHINE}; sleep 5; virsh start ${MACHINE}
+
+
+DEFAULTS
+
+# The following files each handle a different test case.
+# Having them included allows you to set up more than one machine and share
+# the same tests.
+INCLUDE include/patchcheck.conf
+INCLUDE include/tests.conf
+INCLUDE include/bisect.conf
+INCLUDE include/min-config.conf
diff --git a/tools/testing/ktest/examples/snowball.conf b/tools/testing/ktest/examples/snowball.conf
new file mode 100644
index 000000000..a82a3c5bc
--- /dev/null
+++ b/tools/testing/ktest/examples/snowball.conf
@@ -0,0 +1,53 @@
+# This example was used to boot the snowball ARM board.
+# See http://people.redhat.com/srostedt/ktest-embedded-2012/
+
+# PWD is a ktest.pl variable that will result in the process working
+# directory that ktest.pl is executed in.
+
+# THIS_DIR is automatically assigned the PWD of the path that generated
+# the config file. It is best to use this variable when assigning other
+# directory paths within this directory. This allows you to easily
+# move the test cases to other locations or to other machines.
+#
+THIS_DIR := /home/rostedt/work/demo/ktest-embed
+LOG_FILE = ${OUTPUT_DIR}/snowball.log
+CLEAR_LOG = 1
+MAKE_CMD = PATH=/usr/local/gcc-4.5.2-nolibc/arm-unknown-linux-gnueabi/bin:$PATH CROSS_COMPILE=arm-unknown-linux-gnueabi- make ARCH=arm
+ADD_CONFIG = ${THIS_DIR}/addconfig
+
+SCP_TO_TARGET = echo "don't do scp"
+
+TFTPBOOT := /var/lib/tftpboot
+TFTPDEF := ${TFTPBOOT}/snowball-default
+TFTPTEST := ${OUTPUT_DIR}/${BUILD_TARGET}
+
+SWITCH_TO_GOOD = cp ${TFTPDEF} ${TARGET_IMAGE}
+SWITCH_TO_TEST = cp ${TFTPTEST} ${TARGET_IMAGE}
+
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START SKIP
+TEST_TYPE = boot
+BUILD_TYPE = u8500_defconfig
+BUILD_NOCLEAN = 1
+
+TEST_START
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${THIS_DIR}/config.newmin
+START_MIN_CONFIG = ${THIS_DIR}/config.orig
+IGNORE_CONFIG = ${THIS_DIR}/config.ignore
+BUILD_NOCLEAN = 1
+
+
+DEFAULTS
+LOCALVERSION = -test
+POWER_CYCLE = echo use the thumb luke; read a
+CONSOLE = cat ${THIS_DIR}/snowball-cat
+REBOOT_TYPE = script
+SSH_USER = root
+BUILD_OPTIONS = -j8 uImage
+BUILD_DIR = ${THIS_DIR}/linux.git
+OUTPUT_DIR = ${THIS_DIR}/snowball-build
+MACHINE = snowball
+TARGET_IMAGE = /var/lib/tftpboot/snowball-image
+BUILD_TARGET = arch/arm/boot/uImage
diff --git a/tools/testing/ktest/examples/test.conf b/tools/testing/ktest/examples/test.conf
new file mode 100644
index 000000000..b725210ef
--- /dev/null
+++ b/tools/testing/ktest/examples/test.conf
@@ -0,0 +1,62 @@
+#
+# Generic config for a machine
+#
+
+# Name your machine (the DNS name, what you ssh to)
+MACHINE = foo
+
+# BOX can be different than foo, if the machine BOX has
+# multiple partitions with different systems installed. For example,
+# you may have a i386 and x86_64 installation on a test box.
+# If this is the case, MACHINE defines the way to connect to the
+# machine, which may be different between which system the machine
+# is booting into. BOX is used for the scripts to reboot and power cycle
+# the machine, where it does not matter which system the machine boots into.
+#
+#BOX := bar
+
+# Define a way to read the console
+CONSOLE = stty -F /dev/ttyS0 115200 parodd; cat /dev/ttyS0
+
+# The include files will set up the type of test to run. Just set TEST to
+# which test you want to run.
+#
+# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
+#
+# See the include/*.conf files that define these tests
+#
+TEST := patchcheck
+
+# Some tests may have more than one test to run. Define MULTI := 1 to run
+# the extra tests.
+MULTI := 0
+
+# In case you want to differentiate which type of system you are testing
+BITS := 64
+
+# REBOOT = none, error, fail, empty
+#  See include/defaults.conf
+REBOOT := empty
+
+# The defaults file will set up various settings that can be used by all
+# machine configs.
+INCLUDE include/defaults.conf
+
+# In case you need to add a patch for a bisect or something
+#PRE_BUILD = patch -p1 < ${THIS_DIR}/fix.patch
+
+# Reset the repo after the build and remove all 'test' modules from the target
+# Notice that DO_POST_BUILD is a variable (defined by ':=') and POST_BUILD
+# is the option (defined by '=')
+
+DO_POST_BUILD := git reset --hard
+POST_BUILD = ${SSH} 'rm -rf /lib/modules/*-test*'; ${DO_POST_BUILD}
+
+# The following files each handle a different test case.
+# Having them included allows you to set up more than one machine and share
+# the same tests.
+INCLUDE include/patchcheck.conf
+INCLUDE include/tests.conf
+INCLUDE include/bisect.conf
+INCLUDE include/min-config.conf
+
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
new file mode 100755
index 000000000..406401f1a
--- /dev/null
+++ b/tools/testing/ktest/ktest.pl
@@ -0,0 +1,4425 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+use strict;
+use IPC::Open2;
+use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
+use File::Path qw(mkpath);
+use File::Copy qw(cp);
+use FileHandle;
+use FindBin;
+
+my $VERSION = "0.2";
+
+$| = 1;
+
+my %opt;
+my %repeat_tests;
+my %repeats;
+my %evals;
+
+#default opts
+my %default = (
+    "MAILER"			=> "sendmail",  # default mailer
+    "EMAIL_ON_ERROR"		=> 1,
+    "EMAIL_WHEN_FINISHED"	=> 1,
+    "EMAIL_WHEN_CANCELED"	=> 0,
+    "EMAIL_WHEN_STARTED"	=> 0,
+    "NUM_TESTS"			=> 1,
+    "TEST_TYPE"			=> "build",
+    "BUILD_TYPE"		=> "randconfig",
+    "MAKE_CMD"			=> "make",
+    "CLOSE_CONSOLE_SIGNAL"	=> "INT",
+    "TIMEOUT"			=> 120,
+    "TMP_DIR"			=> "/tmp/ktest/\${MACHINE}",
+    "SLEEP_TIME"		=> 60,	# sleep time between tests
+    "BUILD_NOCLEAN"		=> 0,
+    "REBOOT_ON_ERROR"		=> 0,
+    "POWEROFF_ON_ERROR"		=> 0,
+    "REBOOT_ON_SUCCESS"		=> 1,
+    "POWEROFF_ON_SUCCESS"	=> 0,
+    "BUILD_OPTIONS"		=> "",
+    "BISECT_SLEEP_TIME"		=> 60,   # sleep time between bisects
+    "PATCHCHECK_SLEEP_TIME"	=> 60, # sleep time between patch checks
+    "CLEAR_LOG"			=> 0,
+    "BISECT_MANUAL"		=> 0,
+    "BISECT_SKIP"		=> 1,
+    "BISECT_TRIES"		=> 1,
+    "MIN_CONFIG_TYPE"		=> "boot",
+    "SUCCESS_LINE"		=> "login:",
+    "DETECT_TRIPLE_FAULT"	=> 1,
+    "NO_INSTALL"		=> 0,
+    "BOOTED_TIMEOUT"		=> 1,
+    "DIE_ON_FAILURE"		=> 1,
+    "SSH_EXEC"			=> "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND",
+    "SCP_TO_TARGET"		=> "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE",
+    "SCP_TO_TARGET_INSTALL"	=> "\${SCP_TO_TARGET}",
+    "REBOOT"			=> "ssh \$SSH_USER\@\$MACHINE reboot",
+    "STOP_AFTER_SUCCESS"	=> 10,
+    "STOP_AFTER_FAILURE"	=> 60,
+    "STOP_TEST_AFTER"		=> 600,
+    "MAX_MONITOR_WAIT"		=> 1800,
+    "GRUB_REBOOT"		=> "grub2-reboot",
+    "SYSLINUX"			=> "extlinux",
+    "SYSLINUX_PATH"		=> "/boot/extlinux",
+    "CONNECT_TIMEOUT"		=> 25,
+
+# required, and we will ask users if they don't have them but we keep the default
+# value something that is common.
+    "REBOOT_TYPE"		=> "grub",
+    "LOCALVERSION"		=> "-test",
+    "SSH_USER"			=> "root",
+    "BUILD_TARGET"	 	=> "arch/x86/boot/bzImage",
+    "TARGET_IMAGE"		=> "/boot/vmlinuz-test",
+
+    "LOG_FILE"			=> undef,
+    "IGNORE_UNUSED"		=> 0,
+);
+
+my $ktest_config = "ktest.conf";
+my $version;
+my $have_version = 0;
+my $machine;
+my $last_machine;
+my $ssh_user;
+my $tmpdir;
+my $builddir;
+my $outputdir;
+my $output_config;
+my $test_type;
+my $build_type;
+my $build_options;
+my $final_post_ktest;
+my $pre_ktest;
+my $post_ktest;
+my $pre_test;
+my $post_test;
+my $pre_build;
+my $post_build;
+my $pre_build_die;
+my $post_build_die;
+my $reboot_type;
+my $reboot_script;
+my $power_cycle;
+my $reboot;
+my $reboot_on_error;
+my $switch_to_good;
+my $switch_to_test;
+my $poweroff_on_error;
+my $reboot_on_success;
+my $die_on_failure;
+my $powercycle_after_reboot;
+my $poweroff_after_halt;
+my $max_monitor_wait;
+my $ssh_exec;
+my $scp_to_target;
+my $scp_to_target_install;
+my $power_off;
+my $grub_menu;
+my $last_grub_menu;
+my $grub_file;
+my $grub_number;
+my $grub_reboot;
+my $syslinux;
+my $syslinux_path;
+my $syslinux_label;
+my $target;
+my $make;
+my $pre_install;
+my $post_install;
+my $no_install;
+my $noclean;
+my $minconfig;
+my $start_minconfig;
+my $start_minconfig_defined;
+my $output_minconfig;
+my $minconfig_type;
+my $use_output_minconfig;
+my $warnings_file;
+my $ignore_config;
+my $ignore_errors;
+my $addconfig;
+my $in_bisect = 0;
+my $bisect_bad_commit = "";
+my $reverse_bisect;
+my $bisect_manual;
+my $bisect_skip;
+my $bisect_tries;
+my $config_bisect_good;
+my $bisect_ret_good;
+my $bisect_ret_bad;
+my $bisect_ret_skip;
+my $bisect_ret_abort;
+my $bisect_ret_default;
+my $in_patchcheck = 0;
+my $run_test;
+my $buildlog;
+my $testlog;
+my $dmesg;
+my $monitor_fp;
+my $monitor_pid;
+my $monitor_cnt = 0;
+my $sleep_time;
+my $bisect_sleep_time;
+my $patchcheck_sleep_time;
+my $ignore_warnings;
+my $store_failures;
+my $store_successes;
+my $test_name;
+my $timeout;
+my $connect_timeout;
+my $config_bisect_exec;
+my $booted_timeout;
+my $detect_triplefault;
+my $console;
+my $close_console_signal;
+my $reboot_success_line;
+my $success_line;
+my $stop_after_success;
+my $stop_after_failure;
+my $stop_test_after;
+my $build_target;
+my $target_image;
+my $checkout;
+my $localversion;
+my $iteration = 0;
+my $successes = 0;
+my $stty_orig;
+my $run_command_status = 0;
+
+my $bisect_good;
+my $bisect_bad;
+my $bisect_type;
+my $bisect_start;
+my $bisect_replay;
+my $bisect_files;
+my $bisect_reverse;
+my $bisect_check;
+
+my $config_bisect;
+my $config_bisect_type;
+my $config_bisect_check;
+
+my $patchcheck_type;
+my $patchcheck_start;
+my $patchcheck_cherry;
+my $patchcheck_end;
+
+my $build_time;
+my $install_time;
+my $reboot_time;
+my $test_time;
+
+my $pwd;
+my $dirname = $FindBin::Bin;
+
+my $mailto;
+my $mailer;
+my $mail_path;
+my $mail_command;
+my $email_on_error;
+my $email_when_finished;
+my $email_when_started;
+my $email_when_canceled;
+
+my $script_start_time = localtime();
+
+# set when a test is something other that just building or install
+# which would require more options.
+my $buildonly = 1;
+
+# tell build not to worry about warnings, even when WARNINGS_FILE is set
+my $warnings_ok = 0;
+
+# set when creating a new config
+my $newconfig = 0;
+
+my %entered_configs;
+my %config_help;
+my %variable;
+
+# force_config is the list of configs that we force enabled (or disabled)
+# in a .config file. The MIN_CONFIG and ADD_CONFIG configs.
+my %force_config;
+
+# do not force reboots on config problems
+my $no_reboot = 1;
+
+# reboot on success
+my $reboot_success = 0;
+
+my %option_map = (
+    "MAILTO"			=> \$mailto,
+    "MAILER"			=> \$mailer,
+    "MAIL_PATH"			=> \$mail_path,
+    "MAIL_COMMAND"		=> \$mail_command,
+    "EMAIL_ON_ERROR"		=> \$email_on_error,
+    "EMAIL_WHEN_FINISHED"	=> \$email_when_finished,
+    "EMAIL_WHEN_STARTED"	=> \$email_when_started,
+    "EMAIL_WHEN_CANCELED"	=> \$email_when_canceled,
+    "MACHINE"			=> \$machine,
+    "SSH_USER"			=> \$ssh_user,
+    "TMP_DIR"			=> \$tmpdir,
+    "OUTPUT_DIR"		=> \$outputdir,
+    "BUILD_DIR"			=> \$builddir,
+    "TEST_TYPE"			=> \$test_type,
+    "PRE_KTEST"			=> \$pre_ktest,
+    "POST_KTEST"		=> \$post_ktest,
+    "PRE_TEST"			=> \$pre_test,
+    "POST_TEST"			=> \$post_test,
+    "BUILD_TYPE"		=> \$build_type,
+    "BUILD_OPTIONS"		=> \$build_options,
+    "PRE_BUILD"			=> \$pre_build,
+    "POST_BUILD"		=> \$post_build,
+    "PRE_BUILD_DIE"		=> \$pre_build_die,
+    "POST_BUILD_DIE"		=> \$post_build_die,
+    "POWER_CYCLE"		=> \$power_cycle,
+    "REBOOT"			=> \$reboot,
+    "BUILD_NOCLEAN"		=> \$noclean,
+    "MIN_CONFIG"		=> \$minconfig,
+    "OUTPUT_MIN_CONFIG"		=> \$output_minconfig,
+    "START_MIN_CONFIG"		=> \$start_minconfig,
+    "MIN_CONFIG_TYPE"		=> \$minconfig_type,
+    "USE_OUTPUT_MIN_CONFIG"	=> \$use_output_minconfig,
+    "WARNINGS_FILE"		=> \$warnings_file,
+    "IGNORE_CONFIG"		=> \$ignore_config,
+    "TEST"			=> \$run_test,
+    "ADD_CONFIG"		=> \$addconfig,
+    "REBOOT_TYPE"		=> \$reboot_type,
+    "GRUB_MENU"			=> \$grub_menu,
+    "GRUB_FILE"			=> \$grub_file,
+    "GRUB_REBOOT"		=> \$grub_reboot,
+    "SYSLINUX"			=> \$syslinux,
+    "SYSLINUX_PATH"		=> \$syslinux_path,
+    "SYSLINUX_LABEL"		=> \$syslinux_label,
+    "PRE_INSTALL"		=> \$pre_install,
+    "POST_INSTALL"		=> \$post_install,
+    "NO_INSTALL"		=> \$no_install,
+    "REBOOT_SCRIPT"		=> \$reboot_script,
+    "REBOOT_ON_ERROR"		=> \$reboot_on_error,
+    "SWITCH_TO_GOOD"		=> \$switch_to_good,
+    "SWITCH_TO_TEST"		=> \$switch_to_test,
+    "POWEROFF_ON_ERROR"		=> \$poweroff_on_error,
+    "REBOOT_ON_SUCCESS"		=> \$reboot_on_success,
+    "DIE_ON_FAILURE"		=> \$die_on_failure,
+    "POWER_OFF"			=> \$power_off,
+    "POWERCYCLE_AFTER_REBOOT"	=> \$powercycle_after_reboot,
+    "POWEROFF_AFTER_HALT"	=> \$poweroff_after_halt,
+    "MAX_MONITOR_WAIT"		=> \$max_monitor_wait,
+    "SLEEP_TIME"		=> \$sleep_time,
+    "BISECT_SLEEP_TIME"		=> \$bisect_sleep_time,
+    "PATCHCHECK_SLEEP_TIME"	=> \$patchcheck_sleep_time,
+    "IGNORE_WARNINGS"		=> \$ignore_warnings,
+    "IGNORE_ERRORS"		=> \$ignore_errors,
+    "BISECT_MANUAL"		=> \$bisect_manual,
+    "BISECT_SKIP"		=> \$bisect_skip,
+    "BISECT_TRIES"		=> \$bisect_tries,
+    "CONFIG_BISECT_GOOD"	=> \$config_bisect_good,
+    "BISECT_RET_GOOD"		=> \$bisect_ret_good,
+    "BISECT_RET_BAD"		=> \$bisect_ret_bad,
+    "BISECT_RET_SKIP"		=> \$bisect_ret_skip,
+    "BISECT_RET_ABORT"		=> \$bisect_ret_abort,
+    "BISECT_RET_DEFAULT"	=> \$bisect_ret_default,
+    "STORE_FAILURES"		=> \$store_failures,
+    "STORE_SUCCESSES"		=> \$store_successes,
+    "TEST_NAME"			=> \$test_name,
+    "TIMEOUT"			=> \$timeout,
+    "CONNECT_TIMEOUT"		=> \$connect_timeout,
+    "CONFIG_BISECT_EXEC"	=> \$config_bisect_exec,
+    "BOOTED_TIMEOUT"		=> \$booted_timeout,
+    "CONSOLE"			=> \$console,
+    "CLOSE_CONSOLE_SIGNAL"	=> \$close_console_signal,
+    "DETECT_TRIPLE_FAULT"	=> \$detect_triplefault,
+    "SUCCESS_LINE"		=> \$success_line,
+    "REBOOT_SUCCESS_LINE"	=> \$reboot_success_line,
+    "STOP_AFTER_SUCCESS"	=> \$stop_after_success,
+    "STOP_AFTER_FAILURE"	=> \$stop_after_failure,
+    "STOP_TEST_AFTER"		=> \$stop_test_after,
+    "BUILD_TARGET"		=> \$build_target,
+    "SSH_EXEC"			=> \$ssh_exec,
+    "SCP_TO_TARGET"		=> \$scp_to_target,
+    "SCP_TO_TARGET_INSTALL"	=> \$scp_to_target_install,
+    "CHECKOUT"			=> \$checkout,
+    "TARGET_IMAGE"		=> \$target_image,
+    "LOCALVERSION"		=> \$localversion,
+
+    "BISECT_GOOD"		=> \$bisect_good,
+    "BISECT_BAD"		=> \$bisect_bad,
+    "BISECT_TYPE"		=> \$bisect_type,
+    "BISECT_START"		=> \$bisect_start,
+    "BISECT_REPLAY"		=> \$bisect_replay,
+    "BISECT_FILES"		=> \$bisect_files,
+    "BISECT_REVERSE"		=> \$bisect_reverse,
+    "BISECT_CHECK"		=> \$bisect_check,
+
+    "CONFIG_BISECT"		=> \$config_bisect,
+    "CONFIG_BISECT_TYPE"	=> \$config_bisect_type,
+    "CONFIG_BISECT_CHECK"	=> \$config_bisect_check,
+
+    "PATCHCHECK_TYPE"		=> \$patchcheck_type,
+    "PATCHCHECK_START"		=> \$patchcheck_start,
+    "PATCHCHECK_CHERRY"		=> \$patchcheck_cherry,
+    "PATCHCHECK_END"		=> \$patchcheck_end,
+);
+
+# Options may be used by other options, record them.
+my %used_options;
+
+# default variables that can be used
+chomp ($variable{"PWD"} = `pwd`);
+$pwd = $variable{"PWD"};
+
+$config_help{"MACHINE"} = << "EOF"
+ The machine hostname that you will test.
+ For build only tests, it is still needed to differentiate log files.
+EOF
+    ;
+$config_help{"SSH_USER"} = << "EOF"
+ The box is expected to have ssh on normal bootup, provide the user
+  (most likely root, since you need privileged operations)
+EOF
+    ;
+$config_help{"BUILD_DIR"} = << "EOF"
+ The directory that contains the Linux source code (full path).
+ You can use \${PWD} that will be the path where ktest.pl is run, or use
+ \${THIS_DIR} which is assigned \${PWD} but may be changed later.
+EOF
+    ;
+$config_help{"OUTPUT_DIR"} = << "EOF"
+ The directory that the objects will be built (full path).
+ (can not be same as BUILD_DIR)
+ You can use \${PWD} that will be the path where ktest.pl is run, or use
+ \${THIS_DIR} which is assigned \${PWD} but may be changed later.
+EOF
+    ;
+$config_help{"BUILD_TARGET"} = << "EOF"
+ The location of the compiled file to copy to the target.
+ (relative to OUTPUT_DIR)
+EOF
+    ;
+$config_help{"BUILD_OPTIONS"} = << "EOF"
+ Options to add to \"make\" when building.
+ i.e.  -j20
+EOF
+    ;
+$config_help{"TARGET_IMAGE"} = << "EOF"
+ The place to put your image on the test machine.
+EOF
+    ;
+$config_help{"POWER_CYCLE"} = << "EOF"
+ A script or command to reboot the box.
+
+ Here is a digital loggers power switch example
+ POWER_CYCLE = wget --no-proxy -O /dev/null -q  --auth-no-challenge 'http://admin:admin\@power/outlet?5=CCL'
+
+ Here is an example to reboot a virtual box on the current host
+ with the name "Guest".
+ POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+EOF
+    ;
+$config_help{"CONSOLE"} = << "EOF"
+ The script or command that reads the console
+
+  If you use ttywatch server, something like the following would work.
+CONSOLE = nc -d localhost 3001
+
+ For a virtual machine with guest name "Guest".
+CONSOLE =  virsh console Guest
+EOF
+    ;
+$config_help{"LOCALVERSION"} = << "EOF"
+ Required version ending to differentiate the test
+ from other linux builds on the system.
+EOF
+    ;
+$config_help{"REBOOT_TYPE"} = << "EOF"
+ Way to reboot the box to the test kernel.
+ Only valid options so far are "grub", "grub2", "syslinux", and "script".
+
+ If you specify grub, it will assume grub version 1
+ and will search in /boot/grub/menu.lst for the title \$GRUB_MENU
+ and select that target to reboot to the kernel. If this is not
+ your setup, then specify "script" and have a command or script
+ specified in REBOOT_SCRIPT to boot to the target.
+
+ The entry in /boot/grub/menu.lst must be entered in manually.
+ The test will not modify that file.
+
+ If you specify grub2, then you also need to specify both \$GRUB_MENU
+ and \$GRUB_FILE.
+
+ If you specify syslinux, then you may use SYSLINUX to define the syslinux
+ command (defaults to extlinux), and SYSLINUX_PATH to specify the path to
+ the syslinux install (defaults to /boot/extlinux). But you have to specify
+ SYSLINUX_LABEL to define the label to boot to for the test kernel.
+EOF
+    ;
+$config_help{"GRUB_MENU"} = << "EOF"
+ The grub title name for the test kernel to boot
+ (Only mandatory if REBOOT_TYPE = grub or grub2)
+
+ Note, ktest.pl will not update the grub menu.lst, you need to
+ manually add an option for the test. ktest.pl will search
+ the grub menu.lst for this option to find what kernel to
+ reboot into.
+
+ For example, if in the /boot/grub/menu.lst the test kernel title has:
+ title Test Kernel
+ kernel vmlinuz-test
+ GRUB_MENU = Test Kernel
+
+ For grub2, a search of \$GRUB_FILE is performed for the lines
+ that begin with "menuentry". It will not detect submenus. The
+ menu must be a non-nested menu. Add the quotes used in the menu
+ to guarantee your selection, as the first menuentry with the content
+ of \$GRUB_MENU that is found will be used.
+EOF
+    ;
+$config_help{"GRUB_FILE"} = << "EOF"
+ If grub2 is used, the full path for the grub.cfg file is placed
+ here. Use something like /boot/grub2/grub.cfg to search.
+EOF
+    ;
+$config_help{"SYSLINUX_LABEL"} = << "EOF"
+ If syslinux is used, the label that boots the target kernel must
+ be specified with SYSLINUX_LABEL.
+EOF
+    ;
+$config_help{"REBOOT_SCRIPT"} = << "EOF"
+ A script to reboot the target into the test kernel
+ (Only mandatory if REBOOT_TYPE = script)
+EOF
+    ;
+
+sub _logit {
+    if (defined($opt{"LOG_FILE"})) {
+	open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+	print OUT @_;
+	close(OUT);
+    }
+}
+
+sub logit {
+    if (defined($opt{"LOG_FILE"})) {
+	_logit @_;
+    } else {
+	print @_;
+    }
+}
+
+sub doprint {
+    print @_;
+    _logit @_;
+}
+
+sub read_prompt {
+    my ($cancel, $prompt) = @_;
+
+    my $ans;
+
+    for (;;) {
+	if ($cancel) {
+	    print "$prompt [y/n/C] ";
+	} else {
+	    print "$prompt [Y/n] ";
+	}
+	$ans = <STDIN>;
+	chomp $ans;
+	if ($ans =~ /^\s*$/) {
+	    if ($cancel) {
+		$ans = "c";
+	    } else {
+		$ans = "y";
+	    }
+	}
+	last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+	if ($cancel) {
+	    last if ($ans =~ /^c$/i);
+	    print "Please answer either 'y', 'n' or 'c'.\n";
+	} else {
+	    print "Please answer either 'y' or 'n'.\n";
+	}
+    }
+    if ($ans =~ /^c/i) {
+	exit;
+    }
+    if ($ans !~ /^y$/i) {
+	return 0;
+    }
+    return 1;
+}
+
+sub read_yn {
+    my ($prompt) = @_;
+
+    return read_prompt 0, $prompt;
+}
+
+sub read_ync {
+    my ($prompt) = @_;
+
+    return read_prompt 1, $prompt;
+}
+
+sub get_mandatory_config {
+    my ($config) = @_;
+    my $ans;
+
+    return if (defined($opt{$config}));
+
+    if (defined($config_help{$config})) {
+	print "\n";
+	print $config_help{$config};
+    }
+
+    for (;;) {
+	print "$config = ";
+	if (defined($default{$config}) && length($default{$config})) {
+	    print "\[$default{$config}\] ";
+	}
+	$ans = <STDIN>;
+	$ans =~ s/^\s*(.*\S)\s*$/$1/;
+	if ($ans =~ /^\s*$/) {
+	    if ($default{$config}) {
+		$ans = $default{$config};
+	    } else {
+		print "Your answer can not be blank\n";
+		next;
+	    }
+	}
+	$entered_configs{$config} = ${ans};
+	last;
+    }
+}
+
+sub show_time {
+    my ($time) = @_;
+
+    my $hours = 0;
+    my $minutes = 0;
+
+    if ($time > 3600) {
+	$hours = int($time / 3600);
+	$time -= $hours * 3600;
+    }
+    if ($time > 60) {
+	$minutes = int($time / 60);
+	$time -= $minutes * 60;
+    }
+
+    if ($hours > 0) {
+	doprint "$hours hour";
+	doprint "s" if ($hours > 1);
+	doprint " ";
+    }
+
+    if ($minutes > 0) {
+	doprint "$minutes minute";
+	doprint "s" if ($minutes > 1);
+	doprint " ";
+    }
+
+    doprint "$time second";
+    doprint "s" if ($time != 1);
+}
+
+sub print_times {
+    doprint "\n";
+    if ($build_time) {
+	doprint "Build time:   ";
+	show_time($build_time);
+	doprint "\n";
+    }
+    if ($install_time) {
+	doprint "Install time: ";
+	show_time($install_time);
+	doprint "\n";
+    }
+    if ($reboot_time) {
+	doprint "Reboot time:  ";
+	show_time($reboot_time);
+	doprint "\n";
+    }
+    if ($test_time) {
+	doprint "Test time:    ";
+	show_time($test_time);
+	doprint "\n";
+    }
+    # reset for iterations like bisect
+    $build_time = 0;
+    $install_time = 0;
+    $reboot_time = 0;
+    $test_time = 0;
+}
+
+sub get_mandatory_configs {
+    get_mandatory_config("MACHINE");
+    get_mandatory_config("BUILD_DIR");
+    get_mandatory_config("OUTPUT_DIR");
+
+    if ($newconfig) {
+	get_mandatory_config("BUILD_OPTIONS");
+    }
+
+    # options required for other than just building a kernel
+    if (!$buildonly) {
+	get_mandatory_config("POWER_CYCLE");
+	get_mandatory_config("CONSOLE");
+    }
+
+    # options required for install and more
+    if ($buildonly != 1) {
+	get_mandatory_config("SSH_USER");
+	get_mandatory_config("BUILD_TARGET");
+	get_mandatory_config("TARGET_IMAGE");
+    }
+
+    get_mandatory_config("LOCALVERSION");
+
+    return if ($buildonly);
+
+    my $rtype = $opt{"REBOOT_TYPE"};
+
+    if (!defined($rtype)) {
+	if (!defined($opt{"GRUB_MENU"})) {
+	    get_mandatory_config("REBOOT_TYPE");
+	    $rtype = $entered_configs{"REBOOT_TYPE"};
+	} else {
+	    $rtype = "grub";
+	}
+    }
+
+    if ($rtype eq "grub") {
+	get_mandatory_config("GRUB_MENU");
+    }
+
+    if ($rtype eq "grub2") {
+	get_mandatory_config("GRUB_MENU");
+	get_mandatory_config("GRUB_FILE");
+    }
+
+    if ($rtype eq "syslinux") {
+	get_mandatory_config("SYSLINUX_LABEL");
+    }
+}
+
+sub process_variables {
+    my ($value, $remove_undef) = @_;
+    my $retval = "";
+
+    # We want to check for '\', and it is just easier
+    # to check the previous characet of '$' and not need
+    # to worry if '$' is the first character. By adding
+    # a space to $value, we can just check [^\\]\$ and
+    # it will still work.
+    $value = " $value";
+
+    while ($value =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
+	my $begin = $1;
+	my $var = $2;
+	my $end = $3;
+	# append beginning of value to retval
+	$retval = "$retval$begin";
+	if (defined($variable{$var})) {
+	    $retval = "$retval$variable{$var}";
+	} elsif (defined($remove_undef) && $remove_undef) {
+	    # for if statements, any variable that is not defined,
+	    # we simple convert to 0
+	    $retval = "${retval}0";
+	} else {
+	    # put back the origin piece.
+	    $retval = "$retval\$\{$var\}";
+	    # This could be an option that is used later, save
+	    # it so we don't warn if this option is not one of
+	    # ktests options.
+	    $used_options{$var} = 1;
+	}
+	$value = $end;
+    }
+    $retval = "$retval$value";
+
+    # remove the space added in the beginning
+    $retval =~ s/ //;
+
+    return "$retval"
+}
+
+sub set_value {
+    my ($lvalue, $rvalue, $override, $overrides, $name) = @_;
+
+    my $prvalue = process_variables($rvalue);
+
+    if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ &&
+	$prvalue !~ /^(config_|)bisect$/ &&
+	$prvalue !~ /^build$/ &&
+	$buildonly) {
+
+	# Note if a test is something other than build, then we
+	# will need other mandatory options.
+	if ($prvalue ne "install") {
+	    $buildonly = 0;
+	} else {
+	    # install still limits some mandatory options.
+	    $buildonly = 2;
+	}
+    }
+
+    if (defined($opt{$lvalue})) {
+	if (!$override || defined(${$overrides}{$lvalue})) {
+	    my $extra = "";
+	    if ($override) {
+		$extra = "In the same override section!\n";
+	    }
+	    die "$name: $.: Option $lvalue defined more than once!\n$extra";
+	}
+	${$overrides}{$lvalue} = $prvalue;
+    }
+
+    $opt{$lvalue} = $prvalue;
+}
+
+sub set_eval {
+    my ($lvalue, $rvalue, $name) = @_;
+
+    my $prvalue = process_variables($rvalue);
+    my $arr;
+
+    if (defined($evals{$lvalue})) {
+	$arr = $evals{$lvalue};
+    } else {
+	$arr = [];
+	$evals{$lvalue} = $arr;
+    }
+
+    push @{$arr}, $rvalue;
+}
+
+sub set_variable {
+    my ($lvalue, $rvalue) = @_;
+
+    if ($rvalue =~ /^\s*$/) {
+	delete $variable{$lvalue};
+    } else {
+	$rvalue = process_variables($rvalue);
+	$variable{$lvalue} = $rvalue;
+    }
+}
+
+sub process_compare {
+    my ($lval, $cmp, $rval) = @_;
+
+    # remove whitespace
+
+    $lval =~ s/^\s*//;
+    $lval =~ s/\s*$//;
+
+    $rval =~ s/^\s*//;
+    $rval =~ s/\s*$//;
+
+    if ($cmp eq "==") {
+	return $lval eq $rval;
+    } elsif ($cmp eq "!=") {
+	return $lval ne $rval;
+    } elsif ($cmp eq "=~") {
+	return $lval =~ m/$rval/;
+    } elsif ($cmp eq "!~") {
+	return $lval !~ m/$rval/;
+    }
+
+    my $statement = "$lval $cmp $rval";
+    my $ret = eval $statement;
+
+    # $@ stores error of eval
+    if ($@) {
+	return -1;
+    }
+
+    return $ret;
+}
+
+sub value_defined {
+    my ($val) = @_;
+
+    return defined($variable{$2}) ||
+	defined($opt{$2});
+}
+
+my $d = 0;
+sub process_expression {
+    my ($name, $val) = @_;
+
+    my $c = $d++;
+
+    while ($val =~ s/\(([^\(]*?)\)/\&\&\&\&VAL\&\&\&\&/) {
+	my $express = $1;
+
+	if (process_expression($name, $express)) {
+	    $val =~ s/\&\&\&\&VAL\&\&\&\&/ 1 /;
+	} else {
+	    $val =~ s/\&\&\&\&VAL\&\&\&\&/ 0 /;
+	}
+    }
+
+    $d--;
+    my $OR = "\\|\\|";
+    my $AND = "\\&\\&";
+
+    while ($val =~ s/^(.*?)($OR|$AND)//) {
+	my $express = $1;
+	my $op = $2;
+
+	if (process_expression($name, $express)) {
+	    if ($op eq "||") {
+		return 1;
+	    }
+	} else {
+	    if ($op eq "&&") {
+		return 0;
+	    }
+	}
+    }
+
+    if ($val =~ /(.*)(==|\!=|>=|<=|>|<|=~|\!~)(.*)/) {
+	my $ret = process_compare($1, $2, $3);
+	if ($ret < 0) {
+	    die "$name: $.: Unable to process comparison\n";
+	}
+	return $ret;
+    }
+
+    if ($val =~ /^\s*(NOT\s*)?DEFINED\s+(\S+)\s*$/) {
+	if (defined $1) {
+	    return !value_defined($2);
+	} else {
+	    return value_defined($2);
+	}
+    }
+
+    if ($val =~ /^\s*0\s*$/) {
+	return 0;
+    } elsif ($val =~ /^\s*\d+\s*$/) {
+	return 1;
+    }
+
+    die ("$name: $.: Undefined content $val in if statement\n");
+}
+
+sub process_if {
+    my ($name, $value) = @_;
+
+    # Convert variables and replace undefined ones with 0
+    my $val = process_variables($value, 1);
+    my $ret = process_expression $name, $val;
+
+    return $ret;
+}
+
+sub __read_config {
+    my ($config, $current_test_num) = @_;
+
+    my $in;
+    open($in, $config) || die "can't read file $config";
+
+    my $name = $config;
+    $name =~ s,.*/(.*),$1,;
+
+    my $test_num = $$current_test_num;
+    my $default = 1;
+    my $repeat = 1;
+    my $num_tests_set = 0;
+    my $skip = 0;
+    my $rest;
+    my $line;
+    my $test_case = 0;
+    my $if = 0;
+    my $if_set = 0;
+    my $override = 0;
+
+    my %overrides;
+
+    while (<$in>) {
+
+	# ignore blank lines and comments
+	next if (/^\s*$/ || /\s*\#/);
+
+	if (/^\s*(TEST_START|DEFAULTS)\b(.*)/) {
+
+	    my $type = $1;
+	    $rest = $2;
+	    $line = $2;
+
+	    my $old_test_num;
+	    my $old_repeat;
+	    $override = 0;
+
+	    if ($type eq "TEST_START") {
+
+		if ($num_tests_set) {
+		    die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+		}
+
+		$old_test_num = $test_num;
+		$old_repeat = $repeat;
+
+		$test_num += $repeat;
+		$default = 0;
+		$repeat = 1;
+	    } else {
+		$default = 1;
+	    }
+
+	    # If SKIP is anywhere in the line, the command will be skipped
+	    if ($rest =~ s/\s+SKIP\b//) {
+		$skip = 1;
+	    } else {
+		$test_case = 1;
+		$skip = 0;
+	    }
+
+	    if ($rest =~ s/\sELSE\b//) {
+		if (!$if) {
+		    die "$name: $.: ELSE found with out matching IF section\n$_";
+		}
+		$if = 0;
+
+		if ($if_set) {
+		    $skip = 1;
+		} else {
+		    $skip = 0;
+		}
+	    }
+
+	    if ($rest =~ s/\sIF\s+(.*)//) {
+		if (process_if($name, $1)) {
+		    $if_set = 1;
+		} else {
+		    $skip = 1;
+		}
+		$if = 1;
+	    } else {
+		$if = 0;
+		$if_set = 0;
+	    }
+
+	    if (!$skip) {
+		if ($type eq "TEST_START") {
+		    if ($rest =~ s/\s+ITERATE\s+(\d+)//) {
+			$repeat = $1;
+			$repeat_tests{"$test_num"} = $repeat;
+		    }
+		} elsif ($rest =~ s/\sOVERRIDE\b//) {
+		    # DEFAULT only
+		    $override = 1;
+		    # Clear previous overrides
+		    %overrides = ();
+		}
+	    }
+
+	    if (!$skip && $rest !~ /^\s*$/) {
+		die "$name: $.: Gargbage found after $type\n$_";
+	    }
+
+	    if ($skip && $type eq "TEST_START") {
+		$test_num = $old_test_num;
+		$repeat = $old_repeat;
+	    }
+
+	} elsif (/^\s*ELSE\b(.*)$/) {
+	    if (!$if) {
+		die "$name: $.: ELSE found with out matching IF section\n$_";
+	    }
+	    $rest = $1;
+	    if ($if_set) {
+		$skip = 1;
+		$rest = "";
+	    } else {
+		$skip = 0;
+
+		if ($rest =~ /\sIF\s+(.*)/) {
+		    # May be a ELSE IF section.
+		    if (process_if($name, $1)) {
+			$if_set = 1;
+		    } else {
+			$skip = 1;
+		    }
+		    $rest = "";
+		} else {
+		    $if = 0;
+		}
+	    }
+
+	    if ($rest !~ /^\s*$/) {
+		die "$name: $.: Gargbage found after DEFAULTS\n$_";
+	    }
+
+	} elsif (/^\s*INCLUDE\s+(\S+)/) {
+
+	    next if ($skip);
+
+	    if (!$default) {
+		die "$name: $.: INCLUDE can only be done in default sections\n$_";
+	    }
+
+	    my $file = process_variables($1);
+
+	    if ($file !~ m,^/,) {
+		# check the path of the config file first
+		if ($config =~ m,(.*)/,) {
+		    if (-f "$1/$file") {
+			$file = "$1/$file";
+		    }
+		}
+	    }
+		
+	    if ( ! -r $file ) {
+		die "$name: $.: Can't read file $file\n$_";
+	    }
+
+	    if (__read_config($file, \$test_num)) {
+		$test_case = 1;
+	    }
+
+	} elsif (/^\s*([A-Z_\[\]\d]+)\s*=~\s*(.*?)\s*$/) {
+
+	    next if ($skip);
+
+	    my $lvalue = $1;
+	    my $rvalue = $2;
+
+	    if ($default || $lvalue =~ /\[\d+\]$/) {
+		set_eval($lvalue, $rvalue, $name);
+	    } else {
+		my $val = "$lvalue\[$test_num\]";
+		set_eval($val, $rvalue, $name);
+	    }
+
+	} elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
+
+	    next if ($skip);
+
+	    my $lvalue = $1;
+	    my $rvalue = $2;
+
+	    if (!$default &&
+		($lvalue eq "NUM_TESTS" ||
+		 $lvalue eq "LOG_FILE" ||
+		 $lvalue eq "CLEAR_LOG")) {
+		die "$name: $.: $lvalue must be set in DEFAULTS section\n";
+	    }
+
+	    if ($lvalue eq "NUM_TESTS") {
+		if ($test_num) {
+		    die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+		}
+		if (!$default) {
+		    die "$name: $.: NUM_TESTS must be set in default section\n";
+		}
+		$num_tests_set = 1;
+	    }
+
+	    if ($default || $lvalue =~ /\[\d+\]$/) {
+		set_value($lvalue, $rvalue, $override, \%overrides, $name);
+	    } else {
+		my $val = "$lvalue\[$test_num\]";
+		set_value($val, $rvalue, $override, \%overrides, $name);
+
+		if ($repeat > 1) {
+		    $repeats{$val} = $repeat;
+		}
+	    }
+	} elsif (/^\s*([A-Z_\[\]\d]+)\s*:=\s*(.*?)\s*$/) {
+	    next if ($skip);
+
+	    my $lvalue = $1;
+	    my $rvalue = $2;
+
+	    # process config variables.
+	    # Config variables are only active while reading the
+	    # config and can be defined anywhere. They also ignore
+	    # TEST_START and DEFAULTS, but are skipped if they are in
+	    # on of these sections that have SKIP defined.
+	    # The save variable can be
+	    # defined multiple times and the new one simply overrides
+	    # the prevous one.
+	    set_variable($lvalue, $rvalue);
+
+	} else {
+	    die "$name: $.: Garbage found in config\n$_";
+	}
+    }
+
+    if ($test_num) {
+	$test_num += $repeat - 1;
+	$opt{"NUM_TESTS"} = $test_num;
+    }
+
+    close($in);
+
+    $$current_test_num = $test_num;
+
+    return $test_case;
+}
+
+sub get_test_case {
+	print "What test case would you like to run?\n";
+	print " (build, install or boot)\n";
+	print " Other tests are available but require editing ktest.conf\n";
+	print " (see tools/testing/ktest/sample.conf)\n";
+	my $ans = <STDIN>;
+	chomp $ans;
+	$default{"TEST_TYPE"} = $ans;
+}
+
+sub read_config {
+    my ($config) = @_;
+
+    my $test_case;
+    my $test_num = 0;
+
+    $test_case = __read_config $config, \$test_num;
+
+    # make sure we have all mandatory configs
+    get_mandatory_configs;
+
+    # was a test specified?
+    if (!$test_case) {
+	print "No test case specified.\n";
+	get_test_case;
+    }
+
+    # set any defaults
+
+    foreach my $default (keys %default) {
+	if (!defined($opt{$default})) {
+	    $opt{$default} = $default{$default};
+	}
+    }
+
+    if ($opt{"IGNORE_UNUSED"} == 1) {
+	return;
+    }
+
+    my %not_used;
+
+    # check if there are any stragglers (typos?)
+    foreach my $option (keys %opt) {
+	my $op = $option;
+	# remove per test labels.
+	$op =~ s/\[.*\]//;
+	if (!exists($option_map{$op}) &&
+	    !exists($default{$op}) &&
+	    !exists($used_options{$op})) {
+	    $not_used{$op} = 1;
+	}
+    }
+
+    if (%not_used) {
+	my $s = "s are";
+	$s = " is" if (keys %not_used == 1);
+	print "The following option$s not used; could be a typo:\n";
+	foreach my $option (keys %not_used) {
+	    print "$option\n";
+	}
+	print "Set IGRNORE_UNUSED = 1 to have ktest ignore unused variables\n";
+	if (!read_yn "Do you want to continue?") {
+	    exit -1;
+	}
+    }
+}
+
+sub __eval_option {
+    my ($name, $option, $i) = @_;
+
+    # Add space to evaluate the character before $
+    $option = " $option";
+    my $retval = "";
+    my $repeated = 0;
+    my $parent = 0;
+
+    foreach my $test (keys %repeat_tests) {
+	if ($i >= $test &&
+	    $i < $test + $repeat_tests{$test}) {
+
+	    $repeated = 1;
+	    $parent = $test;
+	    last;
+	}
+    }
+
+    while ($option =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
+	my $start = $1;
+	my $var = $2;
+	my $end = $3;
+
+	# Append beginning of line
+	$retval = "$retval$start";
+
+	# If the iteration option OPT[$i] exists, then use that.
+	# otherwise see if the default OPT (without [$i]) exists.
+
+	my $o = "$var\[$i\]";
+	my $parento = "$var\[$parent\]";
+
+	# If a variable contains itself, use the default var
+	if (($var eq $name) && defined($opt{$var})) {
+	    $o = $opt{$var};
+	    $retval = "$retval$o";
+	} elsif (defined($opt{$o})) {
+	    $o = $opt{$o};
+	    $retval = "$retval$o";
+	} elsif ($repeated && defined($opt{$parento})) {
+	    $o = $opt{$parento};
+	    $retval = "$retval$o";
+	} elsif (defined($opt{$var})) {
+	    $o = $opt{$var};
+	    $retval = "$retval$o";
+	} elsif ($var eq "KERNEL_VERSION" && defined($make)) {
+	    # special option KERNEL_VERSION uses kernel version
+	    get_version();
+	    $retval = "$retval$version";
+	} else {
+	    $retval = "$retval\$\{$var\}";
+	}
+
+	$option = $end;
+    }
+
+    $retval = "$retval$option";
+
+    $retval =~ s/^ //;
+
+    return $retval;
+}
+
+sub process_evals {
+    my ($name, $option, $i) = @_;
+
+    my $option_name = "$name\[$i\]";
+    my $ev;
+
+    my $old_option = $option;
+
+    if (defined($evals{$option_name})) {
+	$ev = $evals{$option_name};
+    } elsif (defined($evals{$name})) {
+	$ev = $evals{$name};
+    } else {
+	return $option;
+    }
+
+    for my $e (@{$ev}) {
+	eval "\$option =~ $e";
+    }
+
+    if ($option ne $old_option) {
+	doprint("$name changed from '$old_option' to '$option'\n");
+    }
+
+    return $option;
+}
+
+sub eval_option {
+    my ($name, $option, $i) = @_;
+
+    my $prev = "";
+
+    # Since an option can evaluate to another option,
+    # keep iterating until we do not evaluate any more
+    # options.
+    my $r = 0;
+    while ($prev ne $option) {
+	# Check for recursive evaluations.
+	# 100 deep should be more than enough.
+	if ($r++ > 100) {
+	    die "Over 100 evaluations accurred with $option\n" .
+		"Check for recursive variables\n";
+	}
+	$prev = $option;
+	$option = __eval_option($name, $option, $i);
+    }
+
+    $option = process_evals($name, $option, $i);
+
+    return $option;
+}
+
+sub run_command;
+sub start_monitor;
+sub end_monitor;
+sub wait_for_monitor;
+
+sub reboot {
+    my ($time) = @_;
+    my $powercycle = 0;
+
+    # test if the machine can be connected to within a few seconds
+    my $stat = run_ssh("echo check machine status", $connect_timeout);
+    if (!$stat) {
+	doprint("power cycle\n");
+	$powercycle = 1;
+    }
+
+    if ($powercycle) {
+	run_command "$power_cycle";
+
+	start_monitor;
+	# flush out current monitor
+	# May contain the reboot success line
+	wait_for_monitor 1;
+
+    } else {
+	# Make sure everything has been written to disk
+	run_ssh("sync", 10);
+
+	if (defined($time)) {
+	    start_monitor;
+	    # flush out current monitor
+	    # May contain the reboot success line
+	    wait_for_monitor 1;
+	}
+
+	# try to reboot normally
+	if (run_command $reboot) {
+	    if (defined($powercycle_after_reboot)) {
+		sleep $powercycle_after_reboot;
+		run_command "$power_cycle";
+	    }
+	} else {
+	    # nope? power cycle it.
+	    run_command "$power_cycle";
+	}
+    }
+
+    if (defined($time)) {
+
+	# We only want to get to the new kernel, don't fail
+	# if we stumble over a call trace.
+	my $save_ignore_errors = $ignore_errors;
+	$ignore_errors = 1;
+
+	# Look for the good kernel to boot
+	if (wait_for_monitor($time, "Linux version")) {
+	    # reboot got stuck?
+	    doprint "Reboot did not finish. Forcing power cycle\n";
+	    run_command "$power_cycle";
+	}
+
+	$ignore_errors = $save_ignore_errors;
+
+	# Still need to wait for the reboot to finish
+	wait_for_monitor($time, $reboot_success_line);
+
+	end_monitor;
+    }
+}
+
+sub reboot_to_good {
+    my ($time) = @_;
+
+    if (defined($switch_to_good)) {
+	run_command $switch_to_good;
+    }
+
+    reboot $time;
+}
+
+sub do_not_reboot {
+    my $i = $iteration;
+
+    return $test_type eq "build" || $no_reboot ||
+	($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
+	($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build") ||
+	($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build");
+}
+
+my $in_die = 0;
+
+sub dodie {
+
+    # avoid recusion
+    return if ($in_die);
+    $in_die = 1;
+
+    doprint "CRITICAL FAILURE... ", @_, "\n";
+
+    my $i = $iteration;
+
+    if ($reboot_on_error && !do_not_reboot) {
+
+	doprint "REBOOTING\n";
+	reboot_to_good;
+
+    } elsif ($poweroff_on_error && defined($power_off)) {
+	doprint "POWERING OFF\n";
+	`$power_off`;
+    }
+
+    if (defined($opt{"LOG_FILE"})) {
+	print " See $opt{LOG_FILE} for more info.\n";
+    }
+
+    if ($email_on_error) {
+        send_email("KTEST: critical failure for your [$test_type] test",
+                "Your test started at $script_start_time has failed with:\n@_\n");
+    }
+
+    if ($monitor_cnt) {
+	    # restore terminal settings
+	    system("stty $stty_orig");
+    }
+
+    if (defined($post_test)) {
+	run_command $post_test;
+    }
+
+    die @_, "\n";
+}
+
+sub create_pty {
+    my ($ptm, $pts) = @_;
+    my $tmp;
+    my $TIOCSPTLCK = 0x40045431;
+    my $TIOCGPTN = 0x80045430;
+
+    sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or
+	dodie "Cant open /dev/ptmx";
+
+    # unlockpt()
+    $tmp = pack("i", 0);
+    ioctl($ptm, $TIOCSPTLCK, $tmp) or
+	dodie "ioctl TIOCSPTLCK for /dev/ptmx failed";
+
+    # ptsname()
+    ioctl($ptm, $TIOCGPTN, $tmp) or
+	dodie "ioctl TIOCGPTN for /dev/ptmx failed";
+    $tmp = unpack("i", $tmp);
+
+    sysopen($pts, "/dev/pts/$tmp", O_RDWR | O_NONBLOCK) or
+	dodie "Can't open /dev/pts/$tmp";
+}
+
+sub exec_console {
+    my ($ptm, $pts) = @_;
+
+    close($ptm);
+
+    close(\*STDIN);
+    close(\*STDOUT);
+    close(\*STDERR);
+
+    open(\*STDIN, '<&', $pts);
+    open(\*STDOUT, '>&', $pts);
+    open(\*STDERR, '>&', $pts);
+
+    close($pts);
+
+    exec $console or
+	dodie "Can't open console $console";
+}
+
+sub open_console {
+    my ($ptm) = @_;
+    my $pts = \*PTSFD;
+    my $pid;
+
+    # save terminal settings
+    $stty_orig = `stty -g`;
+
+    # place terminal in cbreak mode so that stdin can be read one character at
+    # a time without having to wait for a newline
+    system("stty -icanon -echo -icrnl");
+
+    create_pty($ptm, $pts);
+
+    $pid = fork;
+
+    if (!$pid) {
+	# child
+	exec_console($ptm, $pts)
+    }
+
+    # parent
+    close($pts);
+
+    return $pid;
+
+    open(PTSFD, "Stop perl from warning about single use of PTSFD");
+}
+
+sub close_console {
+    my ($fp, $pid) = @_;
+
+    doprint "kill child process $pid\n";
+    kill $close_console_signal, $pid;
+
+    doprint "wait for child process $pid to exit\n";
+    waitpid($pid, 0);
+
+    print "closing!\n";
+    close($fp);
+
+    # restore terminal settings
+    system("stty $stty_orig");
+}
+
+sub start_monitor {
+    if ($monitor_cnt++) {
+	return;
+    }
+    $monitor_fp = \*MONFD;
+    $monitor_pid = open_console $monitor_fp;
+
+    return;
+
+    open(MONFD, "Stop perl from warning about single use of MONFD");
+}
+
+sub end_monitor {
+    return if (!defined $console);
+    if (--$monitor_cnt) {
+	return;
+    }
+    close_console($monitor_fp, $monitor_pid);
+}
+
+sub wait_for_monitor {
+    my ($time, $stop) = @_;
+    my $full_line = "";
+    my $line;
+    my $booted = 0;
+    my $start_time = time;
+    my $skip_call_trace = 0;
+    my $bug = 0;
+    my $bug_ignored = 0;
+    my $now;
+
+    doprint "** Wait for monitor to settle down **\n";
+
+    # read the monitor and wait for the system to calm down
+    while (!$booted) {
+	$line = wait_for_input($monitor_fp, $time);
+	last if (!defined($line));
+	print "$line";
+	$full_line .= $line;
+
+	if (defined($stop) && $full_line =~ /$stop/) {
+	    doprint "wait for monitor detected $stop\n";
+	    $booted = 1;
+	}
+
+	if ($full_line =~ /\[ backtrace testing \]/) {
+	    $skip_call_trace = 1;
+	}
+
+	if ($full_line =~ /call trace:/i) {
+	    if (!$bug && !$skip_call_trace) {
+		if ($ignore_errors) {
+		    $bug_ignored = 1;
+		} else {
+		    $bug = 1;
+		}
+	    }
+	}
+
+	if ($full_line =~ /\[ end of backtrace testing \]/) {
+	    $skip_call_trace = 0;
+	}
+
+	if ($full_line =~ /Kernel panic -/) {
+	    $bug = 1;
+	}
+
+	if ($line =~ /\n/) {
+	    $full_line = "";
+	}
+	$now = time;
+	if ($now - $start_time >= $max_monitor_wait) {
+	    doprint "Exiting monitor flush due to hitting MAX_MONITOR_WAIT\n";
+	    return 1;
+	}
+    }
+    print "** Monitor flushed **\n";
+
+    # if stop is defined but wasn't hit, return error
+    # used by reboot (which wants to see a reboot)
+    if (defined($stop) && !$booted) {
+	$bug = 1;
+    }
+    return $bug;
+}
+
+sub save_logs {
+	my ($result, $basedir) = @_;
+	my @t = localtime;
+	my $date = sprintf "%04d%02d%02d%02d%02d%02d",
+		1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
+
+	my $type = $build_type;
+	if ($type =~ /useconfig/) {
+	    $type = "useconfig";
+	}
+
+	my $dir = "$machine-$test_type-$type-$result-$date";
+
+	$dir = "$basedir/$dir";
+
+	if (!-d $dir) {
+	    mkpath($dir) or
+		dodie "can't create $dir";
+	}
+
+	my %files = (
+		"config" => $output_config,
+		"buildlog" => $buildlog,
+		"dmesg" => $dmesg,
+		"testlog" => $testlog,
+	);
+
+	while (my ($name, $source) = each(%files)) {
+		if (-f "$source") {
+			cp "$source", "$dir/$name" or
+				dodie "failed to copy $source";
+		}
+	}
+
+	doprint "*** Saved info to $dir ***\n";
+}
+
+sub fail {
+
+	if ($die_on_failure) {
+		dodie @_;
+	}
+
+	doprint "FAILED\n";
+
+	my $i = $iteration;
+
+	# no need to reboot for just building.
+	if (!do_not_reboot) {
+	    doprint "REBOOTING\n";
+	    reboot_to_good $sleep_time;
+	}
+
+	my $name = "";
+
+	if (defined($test_name)) {
+	    $name = " ($test_name)";
+	}
+
+	print_times;
+
+	doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+	doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+	doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n";
+	doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+	doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+
+	if (defined($store_failures)) {
+	    save_logs "fail", $store_failures;
+        }
+
+	if (defined($post_test)) {
+		run_command $post_test;
+	}
+
+	return 1;
+}
+
+sub run_command {
+    my ($command, $redirect, $timeout) = @_;
+    my $start_time;
+    my $end_time;
+    my $dolog = 0;
+    my $dord = 0;
+    my $dostdout = 0;
+    my $pid;
+
+    $command =~ s/\$SSH_USER/$ssh_user/g;
+    $command =~ s/\$MACHINE/$machine/g;
+
+    doprint("$command ... ");
+    $start_time = time;
+
+    $pid = open(CMD, "$command 2>&1 |") or
+	(fail "unable to exec $command" and return 0);
+
+    if (defined($opt{"LOG_FILE"})) {
+	open(LOG, ">>$opt{LOG_FILE}") or
+	    dodie "failed to write to log";
+	$dolog = 1;
+    }
+
+    if (defined($redirect)) {
+	if ($redirect eq 1) {
+	    $dostdout = 1;
+	    # Have the output of the command on its own line
+	    doprint "\n";
+	} else {
+	    open (RD, ">$redirect") or
+		dodie "failed to write to redirect $redirect";
+	    $dord = 1;
+	}
+    }
+
+    my $hit_timeout = 0;
+
+    while (1) {
+	my $fp = \*CMD;
+	if (defined($timeout)) {
+	    doprint "timeout = $timeout\n";
+	}
+	my $line = wait_for_input($fp, $timeout);
+	if (!defined($line)) {
+	    my $now = time;
+	    if (defined($timeout) && (($now - $start_time) >= $timeout)) {
+		doprint "Hit timeout of $timeout, killing process\n";
+		$hit_timeout = 1;
+		kill 9, $pid;
+	    }
+	    last;
+	}
+	print LOG $line if ($dolog);
+	print RD $line if ($dord);
+	print $line if ($dostdout);
+    }
+
+    waitpid($pid, 0);
+    # shift 8 for real exit status
+    $run_command_status = $? >> 8;
+
+    close(CMD);
+    close(LOG) if ($dolog);
+    close(RD)  if ($dord);
+
+    $end_time = time;
+    my $delta = $end_time - $start_time;
+
+    if ($delta == 1) {
+	doprint "[1 second] ";
+    } else {
+	doprint "[$delta seconds] ";
+    }
+
+    if ($hit_timeout) {
+	$run_command_status = 1;
+    }
+
+    if ($run_command_status) {
+	doprint "FAILED!\n";
+    } else {
+	doprint "SUCCESS\n";
+    }
+
+    return !$run_command_status;
+}
+
+sub run_ssh {
+    my ($cmd, $timeout) = @_;
+    my $cp_exec = $ssh_exec;
+
+    $cp_exec =~ s/\$SSH_COMMAND/$cmd/g;
+    return run_command "$cp_exec", undef , $timeout;
+}
+
+sub run_scp {
+    my ($src, $dst, $cp_scp) = @_;
+
+    $cp_scp =~ s/\$SRC_FILE/$src/g;
+    $cp_scp =~ s/\$DST_FILE/$dst/g;
+
+    return run_command "$cp_scp";
+}
+
+sub run_scp_install {
+    my ($src, $dst) = @_;
+
+    my $cp_scp = $scp_to_target_install;
+
+    return run_scp($src, $dst, $cp_scp);
+}
+
+sub run_scp_mod {
+    my ($src, $dst) = @_;
+
+    my $cp_scp = $scp_to_target;
+
+    return run_scp($src, $dst, $cp_scp);
+}
+
+sub get_grub2_index {
+
+    return if (defined($grub_number) && defined($last_grub_menu) &&
+	       $last_grub_menu eq $grub_menu && defined($last_machine) &&
+	       $last_machine eq $machine);
+
+    doprint "Find grub2 menu ... ";
+    $grub_number = -1;
+
+    my $ssh_grub = $ssh_exec;
+    $ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g;
+
+    open(IN, "$ssh_grub |")
+	or dodie "unable to get $grub_file";
+
+    my $found = 0;
+
+    while (<IN>) {
+	if (/^menuentry.*$grub_menu/) {
+	    $grub_number++;
+	    $found = 1;
+	    last;
+	} elsif (/^menuentry\s|^submenu\s/) {
+	    $grub_number++;
+	}
+    }
+    close(IN);
+
+    dodie "Could not find '$grub_menu' in $grub_file on $machine"
+	if (!$found);
+    doprint "$grub_number\n";
+    $last_grub_menu = $grub_menu;
+    $last_machine = $machine;
+}
+
+sub get_grub_index {
+
+    if ($reboot_type eq "grub2") {
+	get_grub2_index;
+	return;
+    }
+
+    if ($reboot_type ne "grub") {
+	return;
+    }
+    return if (defined($grub_number) && defined($last_grub_menu) &&
+	       $last_grub_menu eq $grub_menu && defined($last_machine) &&
+	       $last_machine eq $machine);
+
+    doprint "Find grub menu ... ";
+    $grub_number = -1;
+
+    my $ssh_grub = $ssh_exec;
+    $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g;
+
+    open(IN, "$ssh_grub |")
+	or dodie "unable to get menu.lst";
+
+    my $found = 0;
+
+    while (<IN>) {
+	if (/^\s*title\s+$grub_menu\s*$/) {
+	    $grub_number++;
+	    $found = 1;
+	    last;
+	} elsif (/^\s*title\s/) {
+	    $grub_number++;
+	}
+    }
+    close(IN);
+
+    dodie "Could not find '$grub_menu' in /boot/grub/menu on $machine"
+	if (!$found);
+    doprint "$grub_number\n";
+    $last_grub_menu = $grub_menu;
+    $last_machine = $machine;
+}
+
+sub wait_for_input
+{
+    my ($fp, $time) = @_;
+    my $start_time;
+    my $rin;
+    my $rout;
+    my $nr;
+    my $buf;
+    my $line;
+    my $ch;
+
+    if (!defined($time)) {
+	$time = $timeout;
+    }
+
+    $rin = '';
+    vec($rin, fileno($fp), 1) = 1;
+    vec($rin, fileno(\*STDIN), 1) = 1;
+
+    $start_time = time;
+
+    while (1) {
+	$nr = select($rout=$rin, undef, undef, $time);
+
+	last if ($nr <= 0);
+
+	# copy data from stdin to the console
+	if (vec($rout, fileno(\*STDIN), 1) == 1) {
+	    $nr = sysread(\*STDIN, $buf, 1000);
+	    syswrite($fp, $buf, $nr) if ($nr > 0);
+	}
+
+	# The timeout is based on time waiting for the fp data
+	if (vec($rout, fileno($fp), 1) != 1) {
+	    last if (defined($time) && (time - $start_time > $time));
+	    next;
+	}
+
+	$line = "";
+
+	# try to read one char at a time
+	while (sysread $fp, $ch, 1) {
+	    $line .= $ch;
+	    last if ($ch eq "\n");
+	}
+
+	last if (!length($line));
+
+	return $line;
+    }
+    return undef;
+}
+
+sub reboot_to {
+    if (defined($switch_to_test)) {
+	run_command $switch_to_test;
+    }
+
+    if ($reboot_type eq "grub") {
+	run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
+    } elsif ($reboot_type eq "grub2") {
+	run_ssh "$grub_reboot $grub_number";
+    } elsif ($reboot_type eq "syslinux") {
+	run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path";
+    } elsif (defined $reboot_script) {
+	run_command "$reboot_script";
+    }
+    reboot;
+}
+
+sub get_sha1 {
+    my ($commit) = @_;
+
+    doprint "git rev-list --max-count=1 $commit ... ";
+    my $sha1 = `git rev-list --max-count=1 $commit`;
+    my $ret = $?;
+
+    logit $sha1;
+
+    if ($ret) {
+	doprint "FAILED\n";
+	dodie "Failed to get git $commit";
+    }
+
+    print "SUCCESS\n";
+
+    chomp $sha1;
+
+    return $sha1;
+}
+
+sub monitor {
+    my $booted = 0;
+    my $bug = 0;
+    my $bug_ignored = 0;
+    my $skip_call_trace = 0;
+    my $loops;
+
+    my $start_time = time;
+
+    wait_for_monitor 5;
+
+    my $line;
+    my $full_line = "";
+
+    open(DMESG, "> $dmesg") or
+	dodie "unable to write to $dmesg";
+
+    reboot_to;
+
+    my $success_start;
+    my $failure_start;
+    my $monitor_start = time;
+    my $done = 0;
+    my $version_found = 0;
+
+    while (!$done) {
+
+	if ($bug && defined($stop_after_failure) &&
+	    $stop_after_failure >= 0) {
+	    my $time = $stop_after_failure - (time - $failure_start);
+	    $line = wait_for_input($monitor_fp, $time);
+	    if (!defined($line)) {
+		doprint "bug timed out after $booted_timeout seconds\n";
+		doprint "Test forced to stop after $stop_after_failure seconds after failure\n";
+		last;
+	    }
+	} elsif ($booted) {
+	    $line = wait_for_input($monitor_fp, $booted_timeout);
+	    if (!defined($line)) {
+		my $s = $booted_timeout == 1 ? "" : "s";
+		doprint "Successful boot found: break after $booted_timeout second$s\n";
+		last;
+	    }
+	} else {
+	    $line = wait_for_input($monitor_fp);
+	    if (!defined($line)) {
+		my $s = $timeout == 1 ? "" : "s";
+		doprint "Timed out after $timeout second$s\n";
+		last;
+	    }
+	}
+
+	doprint $line;
+	print DMESG $line;
+
+	# we are not guaranteed to get a full line
+	$full_line .= $line;
+
+	if ($full_line =~ /$success_line/) {
+	    $booted = 1;
+	    $success_start = time;
+	}
+
+	if ($booted && defined($stop_after_success) &&
+	    $stop_after_success >= 0) {
+	    my $now = time;
+	    if ($now - $success_start >= $stop_after_success) {
+		doprint "Test forced to stop after $stop_after_success seconds after success\n";
+		last;
+	    }
+	}
+
+	if ($full_line =~ /\[ backtrace testing \]/) {
+	    $skip_call_trace = 1;
+	}
+
+	if ($full_line =~ /call trace:/i) {
+	    if (!$bug && !$skip_call_trace) {
+		if ($ignore_errors) {
+		    $bug_ignored = 1;
+		} else {
+		    $bug = 1;
+		    $failure_start = time;
+		}
+	    }
+	}
+
+	if ($bug && defined($stop_after_failure) &&
+	    $stop_after_failure >= 0) {
+	    my $now = time;
+	    if ($now - $failure_start >= $stop_after_failure) {
+		doprint "Test forced to stop after $stop_after_failure seconds after failure\n";
+		last;
+	    }
+	}
+
+	if ($full_line =~ /\[ end of backtrace testing \]/) {
+	    $skip_call_trace = 0;
+	}
+
+	if ($full_line =~ /Kernel panic -/) {
+	    $failure_start = time;
+	    $bug = 1;
+	}
+
+	# Detect triple faults by testing the banner
+	if ($full_line =~ /\bLinux version (\S+).*\n/) {
+	    if ($1 eq $version) {
+		$version_found = 1;
+	    } elsif ($version_found && $detect_triplefault) {
+		# We already booted into the kernel we are testing,
+		# but now we booted into another kernel?
+		# Consider this a triple fault.
+		doprint "Already booted in Linux kernel $version, but now\n";
+		doprint "we booted into Linux kernel $1.\n";
+		doprint "Assuming that this is a triple fault.\n";
+		doprint "To disable this: set DETECT_TRIPLE_FAULT to 0\n";
+		last;
+	    }
+	}
+
+	if ($line =~ /\n/) {
+	    $full_line = "";
+	}
+
+	if ($stop_test_after > 0 && !$booted && !$bug) {
+	    if (time - $monitor_start > $stop_test_after) {
+		doprint "STOP_TEST_AFTER ($stop_test_after seconds) timed out\n";
+		$done = 1;
+	    }
+	}
+    }
+
+    my $end_time = time;
+    $reboot_time = $end_time - $start_time;
+
+    close(DMESG);
+
+    if ($bug) {
+	return 0 if ($in_bisect);
+	fail "failed - got a bug report" and return 0;
+    }
+
+    if (!$booted) {
+	return 0 if ($in_bisect);
+	fail "failed - never got a boot prompt." and return 0;
+    }
+
+    if ($bug_ignored) {
+	doprint "WARNING: Call Trace detected but ignored due to IGNORE_ERRORS=1\n";
+    }
+
+    return 1;
+}
+
+sub eval_kernel_version {
+    my ($option) = @_;
+
+    $option =~ s/\$KERNEL_VERSION/$version/g;
+
+    return $option;
+}
+
+sub do_post_install {
+
+    return if (!defined($post_install));
+
+    my $cp_post_install = eval_kernel_version $post_install;
+    run_command "$cp_post_install" or
+	dodie "Failed to run post install";
+}
+
+# Sometimes the reboot fails, and will hang. We try to ssh to the box
+# and if we fail, we force another reboot, that should powercycle it.
+sub test_booted {
+    if (!run_ssh "echo testing connection") {
+	reboot $sleep_time;
+    }
+}
+
+sub install {
+
+    return if ($no_install);
+
+    my $start_time = time;
+
+    if (defined($pre_install)) {
+	my $cp_pre_install = eval_kernel_version $pre_install;
+	run_command "$cp_pre_install" or
+	    dodie "Failed to run pre install";
+    }
+
+    my $cp_target = eval_kernel_version $target_image;
+
+    test_booted;
+
+    run_scp_install "$outputdir/$build_target", "$cp_target" or
+	dodie "failed to copy image";
+
+    my $install_mods = 0;
+
+    # should we process modules?
+    $install_mods = 0;
+    open(IN, "$output_config") or dodie("Can't read config file");
+    while (<IN>) {
+	if (/CONFIG_MODULES(=y)?/) {
+	    if (defined($1)) {
+		$install_mods = 1;
+		last;
+	    }
+	}
+    }
+    close(IN);
+
+    if (!$install_mods) {
+	do_post_install;
+	doprint "No modules needed\n";
+	my $end_time = time;
+	$install_time = $end_time - $start_time;
+	return;
+    }
+
+    run_command "$make INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=$tmpdir modules_install" or
+	dodie "Failed to install modules";
+
+    my $modlib = "/lib/modules/$version";
+    my $modtar = "ktest-mods.tar.bz2";
+
+    run_ssh "rm -rf $modlib" or
+	dodie "failed to remove old mods: $modlib";
+
+    # would be nice if scp -r did not follow symbolic links
+    run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or
+	dodie "making tarball";
+
+    run_scp_mod "$tmpdir/$modtar", "/tmp" or
+	dodie "failed to copy modules";
+
+    unlink "$tmpdir/$modtar";
+
+    run_ssh "'(cd / && tar xjf /tmp/$modtar)'" or
+	dodie "failed to tar modules";
+
+    run_ssh "rm -f /tmp/$modtar";
+
+    do_post_install;
+
+    my $end_time = time;
+    $install_time = $end_time - $start_time;
+}
+
+sub get_version {
+    # get the release name
+    return if ($have_version);
+    doprint "$make kernelrelease ... ";
+    $version = `$make -s kernelrelease | tail -1`;
+    chomp($version);
+    doprint "$version\n";
+    $have_version = 1;
+}
+
+sub start_monitor_and_install {
+    # Make sure the stable kernel has finished booting
+
+    # Install bisects, don't need console
+    if (defined $console) {
+	start_monitor;
+	wait_for_monitor 5;
+	end_monitor;
+    }
+
+    get_grub_index;
+    get_version;
+    install;
+
+    start_monitor if (defined $console);
+    return monitor;
+}
+
+my $check_build_re = ".*:.*(warning|error|Error):.*";
+my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})";
+
+sub process_warning_line {
+    my ($line) = @_;
+
+    chomp $line;
+
+    # for distcc heterogeneous systems, some compilers
+    # do things differently causing warning lines
+    # to be slightly different. This makes an attempt
+    # to fixe those issues.
+
+    # chop off the index into the line
+    # using distcc, some compilers give different indexes
+    # depending on white space
+    $line =~ s/^(\s*\S+:\d+:)\d+/$1/;
+
+    # Some compilers use UTF-8 extended for quotes and some don't.
+    $line =~ s/$utf8_quote/'/g;
+
+    return $line;
+}
+
+# Read buildlog and check against warnings file for any
+# new warnings.
+#
+# Returns 1 if OK
+#         0 otherwise
+sub check_buildlog {
+    return 1 if (!defined $warnings_file);
+
+    my %warnings_list;
+
+    # Failed builds should not reboot the target
+    my $save_no_reboot = $no_reboot;
+    $no_reboot = 1;
+
+    if (-f $warnings_file) {
+	open(IN, $warnings_file) or
+	    dodie "Error opening $warnings_file";
+
+	while (<IN>) {
+	    if (/$check_build_re/) {
+		my $warning = process_warning_line $_;
+		
+		$warnings_list{$warning} = 1;
+	    }
+	}
+	close(IN);
+    }
+
+    # If warnings file didn't exist, and WARNINGS_FILE exist,
+    # then we fail on any warning!
+
+    open(IN, $buildlog) or dodie "Can't open $buildlog";
+    while (<IN>) {
+	if (/$check_build_re/) {
+	    my $warning = process_warning_line $_;
+
+	    if (!defined $warnings_list{$warning}) {
+		fail "New warning found (not in $warnings_file)\n$_\n";
+		$no_reboot = $save_no_reboot;
+		return 0;
+	    }
+	}
+    }
+    $no_reboot = $save_no_reboot;
+    close(IN);
+}
+
+sub check_patch_buildlog {
+    my ($patch) = @_;
+
+    my @files = `git show $patch | diffstat -l`;
+
+    foreach my $file (@files) {
+	chomp $file;
+    }
+
+    open(IN, "git show $patch |") or
+	dodie "failed to show $patch";
+    while (<IN>) {
+	if (m,^--- a/(.*),) {
+	    chomp $1;
+	    $files[$#files] = $1;
+	}
+    }
+    close(IN);
+
+    open(IN, $buildlog) or dodie "Can't open $buildlog";
+    while (<IN>) {
+	if (/^\s*(.*?):.*(warning|error)/) {
+	    my $err = $1;
+	    foreach my $file (@files) {
+		my $fullpath = "$builddir/$file";
+		if ($file eq $err || $fullpath eq $err) {
+		    fail "$file built with warnings" and return 0;
+		}
+	    }
+	}
+    }
+    close(IN);
+
+    return 1;
+}
+
+sub apply_min_config {
+    my $outconfig = "$output_config.new";
+
+    # Read the config file and remove anything that
+    # is in the force_config hash (from minconfig and others)
+    # then add the force config back.
+
+    doprint "Applying minimum configurations into $output_config.new\n";
+
+    open (OUT, ">$outconfig") or
+	dodie "Can't create $outconfig";
+
+    if (-f $output_config) {
+	open (IN, $output_config) or
+	    dodie "Failed to open $output_config";
+	while (<IN>) {
+	    if (/^(# )?(CONFIG_[^\s=]*)/) {
+		next if (defined($force_config{$2}));
+	    }
+	    print OUT;
+	}
+	close IN;
+    }
+    foreach my $config (keys %force_config) {
+	print OUT "$force_config{$config}\n";
+    }
+    close OUT;
+
+    run_command "mv $outconfig $output_config";
+}
+
+sub make_oldconfig {
+
+    my @force_list = keys %force_config;
+
+    if ($#force_list >= 0) {
+	apply_min_config;
+    }
+
+    if (!run_command "$make olddefconfig") {
+	# Perhaps olddefconfig doesn't exist in this version of the kernel
+	# try oldnoconfig
+	doprint "olddefconfig failed, trying make oldnoconfig\n";
+	if (!run_command "$make oldnoconfig") {
+	    doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+	    # try a yes '' | oldconfig
+	    run_command "yes '' | $make oldconfig" or
+		dodie "failed make config oldconfig";
+	}
+    }
+}
+
+# read a config file and use this to force new configs.
+sub load_force_config {
+    my ($config) = @_;
+
+    doprint "Loading force configs from $config\n";
+    open(IN, $config) or
+	dodie "failed to read $config";
+    while (<IN>) {
+	chomp;
+	if (/^(CONFIG[^\s=]*)(\s*=.*)/) {
+	    $force_config{$1} = $_;
+	} elsif (/^# (CONFIG_\S*) is not set/) {
+	    $force_config{$1} = $_;
+	}
+    }
+    close IN;
+}
+
+sub build {
+    my ($type) = @_;
+
+    unlink $buildlog;
+
+    my $start_time = time;
+
+    # Failed builds should not reboot the target
+    my $save_no_reboot = $no_reboot;
+    $no_reboot = 1;
+
+    # Calculate a new version from here.
+    $have_version = 0;
+
+    if (defined($pre_build)) {
+	my $ret = run_command $pre_build;
+	if (!$ret && defined($pre_build_die) &&
+	    $pre_build_die) {
+	    dodie "failed to pre_build\n";
+	}
+    }
+
+    if ($type =~ /^useconfig:(.*)/) {
+	run_command "cp $1 $output_config" or
+	    dodie "could not copy $1 to .config";
+
+	$type = "oldconfig";
+    }
+
+    # old config can ask questions
+    if ($type eq "oldconfig") {
+	$type = "olddefconfig";
+
+	# allow for empty configs
+	run_command "touch $output_config";
+
+	if (!$noclean) {
+	    run_command "mv $output_config $outputdir/config_temp" or
+		dodie "moving .config";
+
+	    run_command "$make mrproper" or dodie "make mrproper";
+
+	    run_command "mv $outputdir/config_temp $output_config" or
+		dodie "moving config_temp";
+	}
+
+    } elsif (!$noclean) {
+	unlink "$output_config";
+	run_command "$make mrproper" or
+	    dodie "make mrproper";
+    }
+
+    # add something to distinguish this build
+    open(OUT, "> $outputdir/localversion") or dodie("Can't make localversion file");
+    print OUT "$localversion\n";
+    close(OUT);
+
+    if (defined($minconfig)) {
+	load_force_config($minconfig);
+    }
+
+    if ($type ne "olddefconfig") {
+	run_command "$make $type" or
+	    dodie "failed make config";
+    }
+    # Run old config regardless, to enforce min configurations
+    make_oldconfig;
+
+    my $build_ret = run_command "$make $build_options", $buildlog;
+
+    if (defined($post_build)) {
+	# Because a post build may change the kernel version
+	# do it now.
+	get_version;
+	my $ret = run_command $post_build;
+	if (!$ret && defined($post_build_die) &&
+	    $post_build_die) {
+	    dodie "failed to post_build\n";
+	}
+    }
+
+    if (!$build_ret) {
+	# bisect may need this to pass
+	if ($in_bisect) {
+	    $no_reboot = $save_no_reboot;
+	    return 0;
+	}
+	fail "failed build" and return 0;
+    }
+
+    $no_reboot = $save_no_reboot;
+
+    my $end_time = time;
+    $build_time = $end_time - $start_time;
+
+    return 1;
+}
+
+sub halt {
+    if (!run_ssh "halt" or defined($power_off)) {
+	if (defined($poweroff_after_halt)) {
+	    sleep $poweroff_after_halt;
+	    run_command "$power_off";
+	}
+    } else {
+	# nope? the zap it!
+	run_command "$power_off";
+    }
+}
+
+sub success {
+    my ($i) = @_;
+
+    $successes++;
+
+    my $name = "";
+
+    if (defined($test_name)) {
+	$name = " ($test_name)";
+    }
+
+    print_times;
+
+    doprint "\n\n*******************************************\n";
+    doprint     "*******************************************\n";
+    doprint     "KTEST RESULT: TEST $i$name SUCCESS!!!!         **\n";
+    doprint     "*******************************************\n";
+    doprint     "*******************************************\n";
+
+    if (defined($store_successes)) {
+        save_logs "success", $store_successes;
+    }
+
+    if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
+	doprint "Reboot and wait $sleep_time seconds\n";
+	reboot_to_good $sleep_time;
+    }
+
+    if (defined($post_test)) {
+	run_command $post_test;
+    }
+}
+
+sub answer_bisect {
+    for (;;) {
+	doprint "Pass, fail, or skip? [p/f/s]";
+	my $ans = <STDIN>;
+	chomp $ans;
+	if ($ans eq "p" || $ans eq "P") {
+	    return 1;
+	} elsif ($ans eq "f" || $ans eq "F") {
+	    return 0;
+	} elsif ($ans eq "s" || $ans eq "S") {
+	    return -1;
+	} else {
+	    print "Please answer 'p', 'f', or 's'\n";
+	}
+    }
+}
+
+sub child_run_test {
+
+    # child should have no power
+    $reboot_on_error = 0;
+    $poweroff_on_error = 0;
+    $die_on_failure = 1;
+
+    run_command $run_test, $testlog;
+
+    exit $run_command_status;
+}
+
+my $child_done;
+
+sub child_finished {
+    $child_done = 1;
+}
+
+sub do_run_test {
+    my $child_pid;
+    my $child_exit;
+    my $line;
+    my $full_line;
+    my $bug = 0;
+    my $bug_ignored = 0;
+
+    my $start_time = time;
+
+    wait_for_monitor 1;
+
+    doprint "run test $run_test\n";
+
+    $child_done = 0;
+
+    $SIG{CHLD} = qw(child_finished);
+
+    $child_pid = fork;
+
+    child_run_test if (!$child_pid);
+
+    $full_line = "";
+
+    do {
+	$line = wait_for_input($monitor_fp, 1);
+	if (defined($line)) {
+
+	    # we are not guaranteed to get a full line
+	    $full_line .= $line;
+	    doprint $line;
+
+	    if ($full_line =~ /call trace:/i) {
+		if ($ignore_errors) {
+		    $bug_ignored = 1;
+		} else {
+		    $bug = 1;
+		}
+	    }
+
+	    if ($full_line =~ /Kernel panic -/) {
+		$bug = 1;
+	    }
+
+	    if ($line =~ /\n/) {
+		$full_line = "";
+	    }
+	}
+    } while (!$child_done && !$bug);
+
+    if (!$bug && $bug_ignored) {
+	doprint "WARNING: Call Trace detected but ignored due to IGNORE_ERRORS=1\n";
+    }
+
+    if ($bug) {
+	my $failure_start = time;
+	my $now;
+	do {
+	    $line = wait_for_input($monitor_fp, 1);
+	    if (defined($line)) {
+		doprint $line;
+	    }
+	    $now = time;
+	    if ($now - $failure_start >= $stop_after_failure) {
+		last;
+	    }
+	} while (defined($line));
+
+	doprint "Detected kernel crash!\n";
+	# kill the child with extreme prejudice
+	kill 9, $child_pid;
+    }
+
+    waitpid $child_pid, 0;
+    $child_exit = $? >> 8;
+
+    my $end_time = time;
+    $test_time = $end_time - $start_time;
+
+    if (!$bug && $in_bisect) {
+	if (defined($bisect_ret_good)) {
+	    if ($child_exit == $bisect_ret_good) {
+		return 1;
+	    }
+	}
+	if (defined($bisect_ret_skip)) {
+	    if ($child_exit == $bisect_ret_skip) {
+		return -1;
+	    }
+	}
+	if (defined($bisect_ret_abort)) {
+	    if ($child_exit == $bisect_ret_abort) {
+		fail "test abort" and return -2;
+	    }
+	}
+	if (defined($bisect_ret_bad)) {
+	    if ($child_exit == $bisect_ret_skip) {
+		return 0;
+	    }
+	}
+	if (defined($bisect_ret_default)) {
+	    if ($bisect_ret_default eq "good") {
+		return 1;
+	    } elsif ($bisect_ret_default eq "bad") {
+		return 0;
+	    } elsif ($bisect_ret_default eq "skip") {
+		return -1;
+	    } elsif ($bisect_ret_default eq "abort") {
+		return -2;
+	    } else {
+		fail "unknown default action: $bisect_ret_default"
+		    and return -2;
+	    }
+	}
+    }
+
+    if ($bug || $child_exit) {
+	return 0 if $in_bisect;
+	fail "test failed" and return 0;
+    }
+    return 1;
+}
+
+sub run_git_bisect {
+    my ($command) = @_;
+
+    doprint "$command ... ";
+
+    my $output = `$command 2>&1`;
+    my $ret = $?;
+
+    logit $output;
+
+    if ($ret) {
+	doprint "FAILED\n";
+	dodie "Failed to git bisect";
+    }
+
+    doprint "SUCCESS\n";
+    if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) {
+	doprint "$1 [$2]\n";
+    } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) {
+	$bisect_bad_commit = $1;
+	doprint "Found bad commit... $1\n";
+	return 0;
+    } else {
+	# we already logged it, just print it now.
+	print $output;
+    }
+
+    return 1;
+}
+
+sub bisect_reboot {
+    doprint "Reboot and sleep $bisect_sleep_time seconds\n";
+    reboot_to_good $bisect_sleep_time;
+}
+
+# returns 1 on success, 0 on failure, -1 on skip
+sub run_bisect_test {
+    my ($type, $buildtype) = @_;
+
+    my $failed = 0;
+    my $result;
+    my $output;
+    my $ret;
+
+    $in_bisect = 1;
+
+    build $buildtype or $failed = 1;
+
+    if ($type ne "build") {
+	if ($failed && $bisect_skip) {
+	    $in_bisect = 0;
+	    return -1;
+	}
+	dodie "Failed on build" if $failed;
+
+	# Now boot the box
+	start_monitor_and_install or $failed = 1;
+
+	if ($type ne "boot") {
+	    if ($failed && $bisect_skip) {
+		end_monitor;
+		bisect_reboot;
+		$in_bisect = 0;
+		return -1;
+	    }
+	    dodie "Failed on boot" if $failed;
+
+	    do_run_test or $failed = 1;
+	}
+	end_monitor;
+    }
+
+    if ($failed) {
+	$result = 0;
+    } else {
+	$result = 1;
+    }
+
+    # reboot the box to a kernel we can ssh to
+    if ($type ne "build") {
+	bisect_reboot;
+    }
+    $in_bisect = 0;
+
+    return $result;
+}
+
+sub run_bisect {
+    my ($type) = @_;
+    my $buildtype = "oldconfig";
+
+    # We should have a minconfig to use?
+    if (defined($minconfig)) {
+	$buildtype = "useconfig:$minconfig";
+    }
+
+    # If the user sets bisect_tries to less than 1, then no tries
+    # is a success.
+    my $ret = 1;
+
+    # Still let the user manually decide that though.
+    if ($bisect_tries < 1 && $bisect_manual) {
+	$ret = answer_bisect;
+    }
+
+    for (my $i = 0; $i < $bisect_tries; $i++) {
+	if ($bisect_tries > 1) {
+	    my $t = $i + 1;
+	    doprint("Running bisect trial $t of $bisect_tries:\n");
+	}
+	$ret = run_bisect_test $type, $buildtype;
+
+	if ($bisect_manual) {
+	    $ret = answer_bisect;
+	}
+
+	last if (!$ret);
+    }
+
+    # Are we looking for where it worked, not failed?
+    if ($reverse_bisect && $ret >= 0) {
+	$ret = !$ret;
+    }
+
+    if ($ret > 0) {
+	return "good";
+    } elsif ($ret == 0) {
+	return  "bad";
+    } elsif ($bisect_skip) {
+	doprint "HIT A BAD COMMIT ... SKIPPING\n";
+	return "skip";
+    }
+}
+
+sub update_bisect_replay {
+    my $tmp_log = "$tmpdir/ktest_bisect_log";
+    run_command "git bisect log > $tmp_log" or
+	dodie "can't create bisect log";
+    return $tmp_log;
+}
+
+sub bisect {
+    my ($i) = @_;
+
+    my $result;
+
+    dodie "BISECT_GOOD[$i] not defined\n"	if (!defined($bisect_good));
+    dodie "BISECT_BAD[$i] not defined\n"	if (!defined($bisect_bad));
+    dodie "BISECT_TYPE[$i] not defined\n"	if (!defined($bisect_type));
+
+    my $good = $bisect_good;
+    my $bad = $bisect_bad;
+    my $type = $bisect_type;
+    my $start = $bisect_start;
+    my $replay = $bisect_replay;
+    my $start_files = $bisect_files;
+
+    if (defined($start_files)) {
+	$start_files = " -- " . $start_files;
+    } else {
+	$start_files = "";
+    }
+
+    # convert to true sha1's
+    $good = get_sha1($good);
+    $bad = get_sha1($bad);
+
+    if (defined($bisect_reverse) && $bisect_reverse == 1) {
+	doprint "Performing a reverse bisect (bad is good, good is bad!)\n";
+	$reverse_bisect = 1;
+    } else {
+	$reverse_bisect = 0;
+    }
+
+    # Can't have a test without having a test to run
+    if ($type eq "test" && !defined($run_test)) {
+	$type = "boot";
+    }
+
+    # Check if a bisect was running
+    my $bisect_start_file = "$builddir/.git/BISECT_START";
+
+    my $check = $bisect_check;
+    my $do_check = defined($check) && $check ne "0";
+
+    if ( -f $bisect_start_file ) {
+	print "Bisect in progress found\n";
+	if ($do_check) {
+	    print " If you say yes, then no checks of good or bad will be done\n";
+	}
+	if (defined($replay)) {
+	    print "** BISECT_REPLAY is defined in config file **";
+	    print " Ignore config option and perform new git bisect log?\n";
+	    if (read_ync " (yes, no, or cancel) ") {
+		$replay = update_bisect_replay;
+		$do_check = 0;
+	    }
+	} elsif (read_yn "read git log and continue?") {
+	    $replay = update_bisect_replay;
+	    $do_check = 0;
+	}
+    }
+
+    if ($do_check) {
+
+	# get current HEAD
+	my $head = get_sha1("HEAD");
+
+	if ($check ne "good") {
+	    doprint "TESTING BISECT BAD [$bad]\n";
+	    run_command "git checkout $bad" or
+		dodie "Failed to checkout $bad";
+
+	    $result = run_bisect $type;
+
+	    if ($result ne "bad") {
+		fail "Tested BISECT_BAD [$bad] and it succeeded" and return 0;
+	    }
+	}
+
+	if ($check ne "bad") {
+	    doprint "TESTING BISECT GOOD [$good]\n";
+	    run_command "git checkout $good" or
+		dodie "Failed to checkout $good";
+
+	    $result = run_bisect $type;
+
+	    if ($result ne "good") {
+		fail "Tested BISECT_GOOD [$good] and it failed" and return 0;
+	    }
+	}
+
+	# checkout where we started
+	run_command "git checkout $head" or
+	    dodie "Failed to checkout $head";
+    }
+
+    run_command "git bisect start$start_files" or
+	dodie "could not start bisect";
+
+    if (defined($replay)) {
+	run_command "git bisect replay $replay" or
+	    dodie "failed to run replay";
+    } else {
+
+	run_command "git bisect good $good" or
+	    dodie "could not set bisect good to $good";
+
+	run_git_bisect "git bisect bad $bad" or
+	    dodie "could not set bisect bad to $bad";
+
+    }
+
+    if (defined($start)) {
+	run_command "git checkout $start" or
+	    dodie "failed to checkout $start";
+    }
+
+    my $test;
+    do {
+	$result = run_bisect $type;
+	$test = run_git_bisect "git bisect $result";
+	print_times;
+    } while ($test);
+
+    run_command "git bisect log" or
+	dodie "could not capture git bisect log";
+
+    run_command "git bisect reset" or
+	dodie "could not reset git bisect";
+
+    doprint "Bad commit was [$bisect_bad_commit]\n";
+
+    success $i;
+}
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+sub assign_configs {
+    my ($hash, $config) = @_;
+
+    doprint "Reading configs from $config\n";
+
+    open (IN, $config)
+	or dodie "Failed to read $config";
+
+    while (<IN>) {
+	chomp;
+	if (/^((CONFIG\S*)=.*)/) {
+	    ${$hash}{$2} = $1;
+	} elsif (/^(# (CONFIG\S*) is not set)/) {
+	    ${$hash}{$2} = $1;
+	}
+    }
+
+    close(IN);
+}
+
+sub process_config_ignore {
+    my ($config) = @_;
+
+    assign_configs \%config_ignore, $config;
+}
+
+sub get_dependencies {
+    my ($config) = @_;
+
+    my $arr = $dependency{$config};
+    if (!defined($arr)) {
+	return ();
+    }
+
+    my @deps = @{$arr};
+
+    foreach my $dep (@{$arr}) {
+	print "ADD DEP $dep\n";
+	@deps = (@deps, get_dependencies $dep);
+    }
+
+    return @deps;
+}
+
+sub save_config {
+    my ($pc, $file) = @_;
+
+    my %configs = %{$pc};
+
+    doprint "Saving configs into $file\n";
+
+    open(OUT, ">$file") or dodie "Can not write to $file";
+
+    foreach my $config (keys %configs) {
+	print OUT "$configs{$config}\n";
+    }
+    close(OUT);
+}
+
+sub create_config {
+    my ($name, $pc) = @_;
+
+    doprint "Creating old config from $name configs\n";
+
+    save_config $pc, $output_config;
+
+    make_oldconfig;
+}
+
+sub run_config_bisect_test {
+    my ($type) = @_;
+
+    my $ret = run_bisect_test $type, "oldconfig";
+
+    if ($bisect_manual) {
+	$ret = answer_bisect;
+    }
+
+    return $ret;
+}
+
+sub config_bisect_end {
+    my ($good, $bad) = @_;
+    my $diffexec = "diff -u";
+
+    if (-f "$builddir/scripts/diffconfig") {
+	$diffexec = "$builddir/scripts/diffconfig";
+    }
+    doprint "\n\n***************************************\n";
+    doprint "No more config bisecting possible.\n";
+    run_command "$diffexec $good $bad", 1;
+    doprint "***************************************\n\n";
+}
+
+sub run_config_bisect {
+    my ($good, $bad, $last_result) = @_;
+    my $reset = "";
+    my $cmd;
+    my $ret;
+
+    if (!length($last_result)) {
+	$reset = "-r";
+    }
+    run_command "$config_bisect_exec $reset -b $outputdir $good $bad $last_result", 1;
+
+    # config-bisect returns:
+    #   0 if there is more to bisect
+    #   1 for finding a good config
+    #   2 if it can not find any more configs
+    #  -1 (255) on error
+    if ($run_command_status) {
+	return $run_command_status;
+    }
+
+    $ret = run_config_bisect_test $config_bisect_type;
+    if ($ret) {
+        doprint "NEW GOOD CONFIG\n";
+	# Return 3 for good config
+	return 3;
+    } else {
+        doprint "NEW BAD CONFIG\n";
+	# Return 4 for bad config
+	return 4;
+    }
+}
+
+sub config_bisect {
+    my ($i) = @_;
+
+    my $good_config;
+    my $bad_config;
+
+    my $type = $config_bisect_type;
+    my $ret;
+
+    $bad_config = $config_bisect;
+
+    if (defined($config_bisect_good)) {
+	$good_config = $config_bisect_good;
+    } elsif (defined($minconfig)) {
+	$good_config = $minconfig;
+    } else {
+	doprint "No config specified, checking if defconfig works";
+	$ret = run_bisect_test $type, "defconfig";
+	if (!$ret) {
+	    fail "Have no good config to compare with, please set CONFIG_BISECT_GOOD";
+	    return 1;
+	}
+	$good_config = $output_config;
+    }
+
+    if (!defined($config_bisect_exec)) {
+	# First check the location that ktest.pl ran
+	my @locations = ( "$pwd/config-bisect.pl",
+			  "$dirname/config-bisect.pl",
+			  "$builddir/tools/testing/ktest/config-bisect.pl",
+			  undef );
+	foreach my $loc (@locations) {
+	    doprint "loc = $loc\n";
+	    $config_bisect_exec = $loc;
+	    last if (defined($config_bisect_exec && -x $config_bisect_exec));
+	}
+	if (!defined($config_bisect_exec)) {
+	    fail "Could not find an executable config-bisect.pl\n",
+		"  Set CONFIG_BISECT_EXEC to point to config-bisect.pl";
+	    return 1;
+	}
+    }
+
+    # we don't want min configs to cause issues here.
+    doprint "Disabling 'MIN_CONFIG' for this test\n";
+    undef $minconfig;
+
+    my %good_configs;
+    my %bad_configs;
+    my %tmp_configs;
+
+    if (-f "$tmpdir/good_config.tmp" || -f "$tmpdir/bad_config.tmp") {
+	if (read_yn "Interrupted config-bisect. Continue (n - will start new)?") {
+	    if (-f "$tmpdir/good_config.tmp") {
+		$good_config = "$tmpdir/good_config.tmp";
+	    } else {
+		$good_config = "$tmpdir/good_config";
+	    }
+	    if (-f "$tmpdir/bad_config.tmp") {
+		$bad_config = "$tmpdir/bad_config.tmp";
+	    } else {
+		$bad_config = "$tmpdir/bad_config";
+	    }
+	}
+    }
+    doprint "Run good configs through make oldconfig\n";
+    assign_configs \%tmp_configs, $good_config;
+    create_config "$good_config", \%tmp_configs;
+    $good_config = "$tmpdir/good_config";
+    system("cp $output_config $good_config") == 0 or dodie "cp good config";
+
+    doprint "Run bad configs through make oldconfig\n";
+    assign_configs \%tmp_configs, $bad_config;
+    create_config "$bad_config", \%tmp_configs;
+    $bad_config = "$tmpdir/bad_config";
+    system("cp $output_config $bad_config") == 0 or dodie "cp bad config";
+
+    if (defined($config_bisect_check) && $config_bisect_check ne "0") {
+	if ($config_bisect_check ne "good") {
+	    doprint "Testing bad config\n";
+
+	    $ret = run_bisect_test $type, "useconfig:$bad_config";
+	    if ($ret) {
+		fail "Bad config succeeded when expected to fail!";
+		return 0;
+	    }
+	}
+	if ($config_bisect_check ne "bad") {
+	    doprint "Testing good config\n";
+
+	    $ret = run_bisect_test $type, "useconfig:$good_config";
+	    if (!$ret) {
+		fail "Good config failed when expected to succeed!";
+		return 0;
+	    }
+	}
+    }
+
+    my $last_run = "";
+
+    do {
+	$ret = run_config_bisect $good_config, $bad_config, $last_run;
+	if ($ret == 3) {
+	    $last_run = "good";
+	} elsif ($ret == 4) {
+	    $last_run = "bad";
+	}
+	print_times;
+    } while ($ret == 3 || $ret == 4);
+
+    if ($ret == 2) {
+        config_bisect_end "$good_config.tmp", "$bad_config.tmp";
+    }
+
+    return $ret if ($ret < 0);
+
+    success $i;
+}
+
+sub patchcheck_reboot {
+    doprint "Reboot and sleep $patchcheck_sleep_time seconds\n";
+    reboot_to_good $patchcheck_sleep_time;
+}
+
+sub patchcheck {
+    my ($i) = @_;
+
+    dodie "PATCHCHECK_START[$i] not defined\n"
+	if (!defined($patchcheck_start));
+    dodie "PATCHCHECK_TYPE[$i] not defined\n"
+	if (!defined($patchcheck_type));
+
+    my $start = $patchcheck_start;
+
+    my $cherry = $patchcheck_cherry;
+    if (!defined($cherry)) {
+	$cherry = 0;
+    }
+
+    my $end = "HEAD";
+    if (defined($patchcheck_end)) {
+	$end = $patchcheck_end;
+    } elsif ($cherry) {
+	dodie "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
+    }
+
+    # Get the true sha1's since we can use things like HEAD~3
+    $start = get_sha1($start);
+    $end = get_sha1($end);
+
+    my $type = $patchcheck_type;
+
+    # Can't have a test without having a test to run
+    if ($type eq "test" && !defined($run_test)) {
+	$type = "boot";
+    }
+
+    if ($cherry) {
+	open (IN, "git cherry -v $start $end|") or
+	    dodie "could not get git list";
+    } else {
+	open (IN, "git log --pretty=oneline $end|") or
+	    dodie "could not get git list";
+    }
+
+    my @list;
+
+    while (<IN>) {
+	chomp;
+	# git cherry adds a '+' we want to remove
+	s/^\+ //;
+	$list[$#list+1] = $_;
+	last if (/^$start/);
+    }
+    close(IN);
+
+    if (!$cherry) {
+	if ($list[$#list] !~ /^$start/) {
+	    fail "SHA1 $start not found";
+	}
+
+	# go backwards in the list
+	@list = reverse @list;
+    }
+
+    doprint("Going to test the following commits:\n");
+    foreach my $l (@list) {
+	doprint "$l\n";
+    }
+
+    my $save_clean = $noclean;
+    my %ignored_warnings;
+
+    if (defined($ignore_warnings)) {
+	foreach my $sha1 (split /\s+/, $ignore_warnings) {
+	    $ignored_warnings{$sha1} = 1;
+	}
+    }
+
+    $in_patchcheck = 1;
+    foreach my $item (@list) {
+	my $sha1 = $item;
+	$sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+
+	doprint "\nProcessing commit \"$item\"\n\n";
+
+	run_command "git checkout $sha1" or
+	    dodie "Failed to checkout $sha1";
+
+	# only clean on the first and last patch
+	if ($item eq $list[0] ||
+	    $item eq $list[$#list]) {
+	    $noclean = $save_clean;
+	} else {
+	    $noclean = 1;
+	}
+
+	if (defined($minconfig)) {
+	    build "useconfig:$minconfig" or return 0;
+	} else {
+	    # ?? no config to use?
+	    build "oldconfig" or return 0;
+	}
+
+	# No need to do per patch checking if warnings file exists
+	if (!defined($warnings_file) && !defined($ignored_warnings{$sha1})) {
+	    check_patch_buildlog $sha1 or return 0;
+	}
+
+	check_buildlog or return 0;
+
+	next if ($type eq "build");
+
+	my $failed = 0;
+
+	start_monitor_and_install or $failed = 1;
+
+	if (!$failed && $type ne "boot"){
+	    do_run_test or $failed = 1;
+	}
+	end_monitor;
+	if ($failed) {
+	    print_times;
+	    return 0;
+	}
+	patchcheck_reboot;
+	print_times;
+    }
+    $in_patchcheck = 0;
+    success $i;
+
+    return 1;
+}
+
+my %depends;
+my %depcount;
+my $iflevel = 0;
+my @ifdeps;
+
+# prevent recursion
+my %read_kconfigs;
+
+sub add_dep {
+    # $config depends on $dep
+    my ($config, $dep) = @_;
+
+    if (defined($depends{$config})) {
+	$depends{$config} .= " " . $dep;
+    } else {
+	$depends{$config} = $dep;
+    }
+
+    # record the number of configs depending on $dep
+    if (defined $depcount{$dep}) {
+	$depcount{$dep}++;
+    } else {
+	$depcount{$dep} = 1;
+    } 
+}
+
+# taken from streamline_config.pl
+sub read_kconfig {
+    my ($kconfig) = @_;
+
+    my $state = "NONE";
+    my $config;
+    my @kconfigs;
+
+    my $cont = 0;
+    my $line;
+
+
+    if (! -f $kconfig) {
+	doprint "file $kconfig does not exist, skipping\n";
+	return;
+    }
+
+    open(KIN, "$kconfig")
+	or dodie "Can't open $kconfig";
+    while (<KIN>) {
+	chomp;
+
+	# Make sure that lines ending with \ continue
+	if ($cont) {
+	    $_ = $line . " " . $_;
+	}
+
+	if (s/\\$//) {
+	    $cont = 1;
+	    $line = $_;
+	    next;
+	}
+
+	$cont = 0;
+
+	# collect any Kconfig sources
+	if (/^source\s*"(.*)"/) {
+	    $kconfigs[$#kconfigs+1] = $1;
+	}
+
+	# configs found
+	if (/^\s*(menu)?config\s+(\S+)\s*$/) {
+	    $state = "NEW";
+	    $config = $2;
+
+	    for (my $i = 0; $i < $iflevel; $i++) {
+		add_dep $config, $ifdeps[$i];
+	    }
+
+	# collect the depends for the config
+	} elsif ($state eq "NEW" && /^\s*depends\s+on\s+(.*)$/) {
+
+	    add_dep $config, $1;
+
+	# Get the configs that select this config
+	} elsif ($state eq "NEW" && /^\s*select\s+(\S+)/) {
+
+	    # selected by depends on config
+	    add_dep $1, $config;
+
+	# Check for if statements
+	} elsif (/^if\s+(.*\S)\s*$/) {
+	    my $deps = $1;
+	    # remove beginning and ending non text
+	    $deps =~ s/^[^a-zA-Z0-9_]*//;
+	    $deps =~ s/[^a-zA-Z0-9_]*$//;
+
+	    my @deps = split /[^a-zA-Z0-9_]+/, $deps;
+
+	    $ifdeps[$iflevel++] = join ':', @deps;
+
+	} elsif (/^endif/) {
+
+	    $iflevel-- if ($iflevel);
+
+	# stop on "help"
+	} elsif (/^\s*help\s*$/) {
+	    $state = "NONE";
+	}
+    }
+    close(KIN);
+
+    # read in any configs that were found.
+    foreach $kconfig (@kconfigs) {
+	if (!defined($read_kconfigs{$kconfig})) {
+	    $read_kconfigs{$kconfig} = 1;
+	    read_kconfig("$builddir/$kconfig");
+	}
+    }
+}
+
+sub read_depends {
+    # find out which arch this is by the kconfig file
+    open (IN, $output_config)
+	or dodie "Failed to read $output_config";
+    my $arch;
+    while (<IN>) {
+	if (m,Linux/(\S+)\s+\S+\s+Kernel Configuration,) {
+	    $arch = $1;
+	    last;
+	}
+    }
+    close IN;
+
+    if (!defined($arch)) {
+	doprint "Could not find arch from config file\n";
+	doprint "no dependencies used\n";
+	return;
+    }
+
+    # arch is really the subarch, we need to know
+    # what directory to look at.
+    if ($arch eq "i386" || $arch eq "x86_64") {
+	$arch = "x86";
+    }
+
+    my $kconfig = "$builddir/arch/$arch/Kconfig";
+
+    if (! -f $kconfig && $arch =~ /\d$/) {
+	my $orig = $arch;
+ 	# some subarchs have numbers, truncate them
+	$arch =~ s/\d*$//;
+	$kconfig = "$builddir/arch/$arch/Kconfig";
+	if (! -f $kconfig) {
+	    doprint "No idea what arch dir $orig is for\n";
+	    doprint "no dependencies used\n";
+	    return;
+	}
+    }
+
+    read_kconfig($kconfig);
+}
+
+sub make_new_config {
+    my @configs = @_;
+
+    open (OUT, ">$output_config")
+	or dodie "Failed to write $output_config";
+
+    foreach my $config (@configs) {
+	print OUT "$config\n";
+    }
+    close OUT;
+}
+
+sub chomp_config {
+    my ($config) = @_;
+
+    $config =~ s/CONFIG_//;
+
+    return $config;
+}
+
+sub get_depends {
+    my ($dep) = @_;
+
+    my $kconfig = chomp_config $dep;
+
+    $dep = $depends{"$kconfig"};
+
+    # the dep string we have saves the dependencies as they
+    # were found, including expressions like ! && ||. We
+    # want to split this out into just an array of configs.
+
+    my $valid = "A-Za-z_0-9";
+
+    my @configs;
+
+    while ($dep =~ /[$valid]/) {
+
+	if ($dep =~ /^[^$valid]*([$valid]+)/) {
+	    my $conf = "CONFIG_" . $1;
+
+	    $configs[$#configs + 1] = $conf;
+
+	    $dep =~ s/^[^$valid]*[$valid]+//;
+	} else {
+	    dodie "this should never happen";
+	}
+    }
+
+    return @configs;
+}
+
+my %min_configs;
+my %keep_configs;
+my %save_configs;
+my %processed_configs;
+my %nochange_config;
+
+sub test_this_config {
+    my ($config) = @_;
+
+    my $found;
+
+    # if we already processed this config, skip it
+    if (defined($processed_configs{$config})) {
+	return undef;
+    }
+    $processed_configs{$config} = 1;
+
+    # if this config failed during this round, skip it
+    if (defined($nochange_config{$config})) {
+	return undef;
+    }
+
+    my $kconfig = chomp_config $config;
+
+    # Test dependencies first
+    if (defined($depends{"$kconfig"})) {
+	my @parents = get_depends $config;
+	foreach my $parent (@parents) {
+	    # if the parent is in the min config, check it first
+	    next if (!defined($min_configs{$parent}));
+	    $found = test_this_config($parent);
+	    if (defined($found)) {
+		return $found;
+	    }
+	}
+    }
+
+    # Remove this config from the list of configs
+    # do a make olddefconfig and then read the resulting
+    # .config to make sure it is missing the config that
+    # we had before
+    my %configs = %min_configs;
+    delete $configs{$config};
+    make_new_config ((values %configs), (values %keep_configs));
+    make_oldconfig;
+    undef %configs;
+    assign_configs \%configs, $output_config;
+
+    if (!defined($configs{$config}) || $configs{$config} =~ /^#/) {
+	return $config;
+    }
+
+    doprint "disabling config $config did not change .config\n";
+
+    $nochange_config{$config} = 1;
+
+    return undef;
+}
+
+sub make_min_config {
+    my ($i) = @_;
+
+    my $type = $minconfig_type;
+    if ($type ne "boot" && $type ne "test") {
+	fail "Invalid MIN_CONFIG_TYPE '$minconfig_type'\n" .
+	    " make_min_config works only with 'boot' and 'test'\n" and return;
+    }
+
+    if (!defined($output_minconfig)) {
+	fail "OUTPUT_MIN_CONFIG not defined" and return;
+    }
+
+    # If output_minconfig exists, and the start_minconfig
+    # came from min_config, than ask if we should use
+    # that instead.
+    if (-f $output_minconfig && !$start_minconfig_defined) {
+	print "$output_minconfig exists\n";
+	if (!defined($use_output_minconfig)) {
+	    if (read_yn " Use it as minconfig?") {
+		$start_minconfig = $output_minconfig;
+	    }
+	} elsif ($use_output_minconfig > 0) {
+	    doprint "Using $output_minconfig as MIN_CONFIG\n";
+	    $start_minconfig = $output_minconfig;
+	} else {
+	    doprint "Set to still use MIN_CONFIG as starting point\n";
+	}
+    }
+
+    if (!defined($start_minconfig)) {
+	fail "START_MIN_CONFIG or MIN_CONFIG not defined" and return;
+    }
+
+    my $temp_config = "$tmpdir/temp_config";
+
+    # First things first. We build an allnoconfig to find
+    # out what the defaults are that we can't touch.
+    # Some are selections, but we really can't handle selections.
+
+    my $save_minconfig = $minconfig;
+    undef $minconfig;
+
+    run_command "$make allnoconfig" or return 0;
+
+    read_depends;
+
+    process_config_ignore $output_config;
+
+    undef %save_configs;
+    undef %min_configs;
+
+    if (defined($ignore_config)) {
+	# make sure the file exists
+	`touch $ignore_config`;
+	assign_configs \%save_configs, $ignore_config;
+    }
+
+    %keep_configs = %save_configs;
+
+    doprint "Load initial configs from $start_minconfig\n";
+
+    # Look at the current min configs, and save off all the
+    # ones that were set via the allnoconfig
+    assign_configs \%min_configs, $start_minconfig;
+
+    my @config_keys = keys %min_configs;
+
+    # All configs need a depcount
+    foreach my $config (@config_keys) {
+	my $kconfig = chomp_config $config;
+	if (!defined $depcount{$kconfig}) {
+		$depcount{$kconfig} = 0;
+	}
+    }
+
+    # Remove anything that was set by the make allnoconfig
+    # we shouldn't need them as they get set for us anyway.
+    foreach my $config (@config_keys) {
+	# Remove anything in the ignore_config
+	if (defined($keep_configs{$config})) {
+	    my $file = $ignore_config;
+	    $file =~ s,.*/(.*?)$,$1,;
+	    doprint "$config set by $file ... ignored\n";
+	    delete $min_configs{$config};
+	    next;
+	}
+	# But make sure the settings are the same. If a min config
+	# sets a selection, we do not want to get rid of it if
+	# it is not the same as what we have. Just move it into
+	# the keep configs.
+	if (defined($config_ignore{$config})) {
+	    if ($config_ignore{$config} ne $min_configs{$config}) {
+		doprint "$config is in allnoconfig as '$config_ignore{$config}'";
+		doprint " but it is '$min_configs{$config}' in minconfig .. keeping\n";
+		$keep_configs{$config} = $min_configs{$config};
+	    } else {
+		doprint "$config set by allnoconfig ... ignored\n";
+	    }
+	    delete $min_configs{$config};
+	}
+    }
+
+    my $done = 0;
+    my $take_two = 0;
+
+    while (!$done) {
+
+	my $config;
+	my $found;
+
+	# Now disable each config one by one and do a make oldconfig
+	# till we find a config that changes our list.
+
+	my @test_configs = keys %min_configs;
+
+	# Sort keys by who is most dependent on
+	@test_configs = sort  { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} }
+			  @test_configs ;
+
+	# Put configs that did not modify the config at the end.
+	my $reset = 1;
+	for (my $i = 0; $i < $#test_configs; $i++) {
+	    if (!defined($nochange_config{$test_configs[0]})) {
+		$reset = 0;
+		last;
+	    }
+	    # This config didn't change the .config last time.
+	    # Place it at the end
+	    my $config = shift @test_configs;
+	    push @test_configs, $config;
+	}
+
+	# if every test config has failed to modify the .config file
+	# in the past, then reset and start over.
+	if ($reset) {
+	    undef %nochange_config;
+	}
+
+	undef %processed_configs;
+
+	foreach my $config (@test_configs) {
+
+	    $found = test_this_config $config;
+
+	    last if (defined($found));
+
+	    # oh well, try another config
+	}
+
+	if (!defined($found)) {
+	    # we could have failed due to the nochange_config hash
+	    # reset and try again
+	    if (!$take_two) {
+		undef %nochange_config;
+		$take_two = 1;
+		next;
+	    }
+	    doprint "No more configs found that we can disable\n";
+	    $done = 1;
+	    last;
+	}
+	$take_two = 0;
+
+	$config = $found;
+
+	doprint "Test with $config disabled\n";
+
+	# set in_bisect to keep build and monitor from dieing
+	$in_bisect = 1;
+
+	my $failed = 0;
+	build "oldconfig" or $failed = 1;
+	if (!$failed) {
+		start_monitor_and_install or $failed = 1;
+
+		if ($type eq "test" && !$failed) {
+		    do_run_test or $failed = 1;
+		}
+
+		end_monitor;
+	}
+
+	$in_bisect = 0;
+
+	if ($failed) {
+	    doprint "$min_configs{$config} is needed to boot the box... keeping\n";
+	    # this config is needed, add it to the ignore list.
+	    $keep_configs{$config} = $min_configs{$config};
+	    $save_configs{$config} = $min_configs{$config};
+	    delete $min_configs{$config};
+
+	    # update new ignore configs
+	    if (defined($ignore_config)) {
+		open (OUT, ">$temp_config")
+		    or dodie "Can't write to $temp_config";
+		foreach my $config (keys %save_configs) {
+		    print OUT "$save_configs{$config}\n";
+		}
+		close OUT;
+		run_command "mv $temp_config $ignore_config" or
+		    dodie "failed to copy update to $ignore_config";
+	    }
+
+	} else {
+	    # We booted without this config, remove it from the minconfigs.
+	    doprint "$config is not needed, disabling\n";
+
+	    delete $min_configs{$config};
+
+	    # Also disable anything that is not enabled in this config
+	    my %configs;
+	    assign_configs \%configs, $output_config;
+	    my @config_keys = keys %min_configs;
+	    foreach my $config (@config_keys) {
+		if (!defined($configs{$config})) {
+		    doprint "$config is not set, disabling\n";
+		    delete $min_configs{$config};
+		}
+	    }
+
+	    # Save off all the current mandatory configs
+	    open (OUT, ">$temp_config")
+		or dodie "Can't write to $temp_config";
+	    foreach my $config (keys %keep_configs) {
+		print OUT "$keep_configs{$config}\n";
+	    }
+	    foreach my $config (keys %min_configs) {
+		print OUT "$min_configs{$config}\n";
+	    }
+	    close OUT;
+
+	    run_command "mv $temp_config $output_minconfig" or
+		dodie "failed to copy update to $output_minconfig";
+	}
+
+	doprint "Reboot and wait $sleep_time seconds\n";
+	reboot_to_good $sleep_time;
+    }
+
+    success $i;
+    return 1;
+}
+
+sub make_warnings_file {
+    my ($i) = @_;
+
+    if (!defined($warnings_file)) {
+	dodie "Must define WARNINGS_FILE for make_warnings_file test";
+    }
+
+    if ($build_type eq "nobuild") {
+	dodie "BUILD_TYPE can not be 'nobuild' for make_warnings_file test";
+    }
+
+    build $build_type or dodie "Failed to build";
+
+    open(OUT, ">$warnings_file") or dodie "Can't create $warnings_file";
+
+    open(IN, $buildlog) or dodie "Can't open $buildlog";
+    while (<IN>) {
+
+	# Some compilers use UTF-8 extended for quotes
+	# for distcc heterogeneous systems, this causes issues
+	s/$utf8_quote/'/g;
+
+	if (/$check_build_re/) {
+	    print OUT;
+	}
+    }
+    close(IN);
+
+    close(OUT);
+
+    success $i;
+}
+
+$#ARGV < 1 or die "ktest.pl version: $VERSION\n   usage: ktest.pl [config-file]\n";
+
+if ($#ARGV == 0) {
+    $ktest_config = $ARGV[0];
+    if (! -f $ktest_config) {
+	print "$ktest_config does not exist.\n";
+	if (!read_yn "Create it?") {
+	    exit 0;
+	}
+    }
+}
+
+if (! -f $ktest_config) {
+    $newconfig = 1;
+    get_test_case;
+    open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
+    print OUT << "EOF"
+# Generated by ktest.pl
+#
+
+# PWD is a ktest.pl variable that will result in the process working
+# directory that ktest.pl is executed in.
+
+# THIS_DIR is automatically assigned the PWD of the path that generated
+# the config file. It is best to use this variable when assigning other
+# directory paths within this directory. This allows you to easily
+# move the test cases to other locations or to other machines.
+#
+THIS_DIR := $variable{"PWD"}
+
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START
+TEST_TYPE = $default{"TEST_TYPE"}
+
+DEFAULTS
+EOF
+;
+    close(OUT);
+}
+read_config $ktest_config;
+
+if (defined($opt{"LOG_FILE"})) {
+    $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1);
+}
+
+# Append any configs entered in manually to the config file.
+my @new_configs = keys %entered_configs;
+if ($#new_configs >= 0) {
+    print "\nAppending entered in configs to $ktest_config\n";
+    open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
+    foreach my $config (@new_configs) {
+	print OUT "$config = $entered_configs{$config}\n";
+	$opt{$config} = process_variables($entered_configs{$config});
+    }
+}
+
+if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) {
+    unlink $opt{"LOG_FILE"};
+}
+
+doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
+
+for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
+
+    if (!$i) {
+	doprint "DEFAULT OPTIONS:\n";
+    } else {
+	doprint "\nTEST $i OPTIONS";
+	if (defined($repeat_tests{$i})) {
+	    $repeat = $repeat_tests{$i};
+	    doprint " ITERATE $repeat";
+	}
+	doprint "\n";
+    }
+
+    foreach my $option (sort keys %opt) {
+
+	if ($option =~ /\[(\d+)\]$/) {
+	    next if ($i != $1);
+	} else {
+	    next if ($i);
+	}
+
+	doprint "$option = $opt{$option}\n";
+    }
+}
+
+sub option_defined {
+    my ($option) = @_;
+
+    if (defined($opt{$option}) && $opt{$option} !~ /^\s*$/) {
+	return 1;
+    }
+
+    return 0;
+}
+
+sub __set_test_option {
+    my ($name, $i) = @_;
+
+    my $option = "$name\[$i\]";
+
+    if (option_defined($option)) {
+	return $opt{$option};
+    }
+
+    foreach my $test (keys %repeat_tests) {
+	if ($i >= $test &&
+	    $i < $test + $repeat_tests{$test}) {
+	    $option = "$name\[$test\]";
+	    if (option_defined($option)) {
+		return $opt{$option};
+	    }
+	}
+    }
+
+    if (option_defined($name)) {
+	return $opt{$name};
+    }
+
+    return undef;
+}
+
+sub set_test_option {
+    my ($name, $i) = @_;
+
+    my $option = __set_test_option($name, $i);
+    return $option if (!defined($option));
+
+    return eval_option($name, $option, $i);
+}
+
+sub find_mailer {
+    my ($mailer) = @_;
+
+    my @paths = split /:/, $ENV{PATH};
+
+    # sendmail is usually in /usr/sbin
+    $paths[$#paths + 1] = "/usr/sbin";
+
+    foreach my $path (@paths) {
+	if (-x "$path/$mailer") {
+	    return $path;
+	}
+    }
+
+    return undef;
+}
+
+sub do_send_mail {
+    my ($subject, $message) = @_;
+
+    if (!defined($mail_path)) {
+	# find the mailer
+	$mail_path = find_mailer $mailer;
+	if (!defined($mail_path)) {
+	    die "\nCan not find $mailer in PATH\n";
+	}
+    }
+
+    if (!defined($mail_command)) {
+	if ($mailer eq "mail" || $mailer eq "mailx") {
+	    $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+	} elsif ($mailer eq "sendmail" ) {
+	    $mail_command =  "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+	} else {
+	    die "\nYour mailer: $mailer is not supported.\n";
+	}
+    }
+
+    $mail_command =~ s/\$MAILER/$mailer/g;
+    $mail_command =~ s/\$MAIL_PATH/$mail_path/g;
+    $mail_command =~ s/\$MAILTO/$mailto/g;
+    $mail_command =~ s/\$SUBJECT/$subject/g;
+    $mail_command =~ s/\$MESSAGE/$message/g;
+
+    my $ret = run_command $mail_command;
+    if (!$ret && defined($file)) {
+	# try again without the file
+	$message .= "\n\n*** FAILED TO SEND LOG ***\n\n";
+	do_send_email($subject, $message);
+    }
+}
+
+sub send_email {
+
+    if (defined($mailto)) {
+	if (!defined($mailer)) {
+	    doprint "No email sent: email or mailer not specified in config.\n";
+	    return;
+	}
+	do_send_mail @_;
+    }
+}
+
+sub cancel_test {
+    if ($email_when_canceled) {
+        send_email("KTEST: Your [$test_type] test was cancelled",
+                "Your test started at $script_start_time was cancelled: sig int");
+    }
+    die "\nCaught Sig Int, test interrupted: $!\n"
+}
+
+$SIG{INT} = qw(cancel_test);
+
+# First we need to do is the builds
+for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
+
+    # Do not reboot on failing test options
+    $no_reboot = 1;
+    $reboot_success = 0;
+
+    $have_version = 0;
+
+    $iteration = $i;
+
+    $build_time = 0;
+    $install_time = 0;
+    $reboot_time = 0;
+    $test_time = 0;
+
+    undef %force_config;
+
+    my $makecmd = set_test_option("MAKE_CMD", $i);
+
+    $outputdir = set_test_option("OUTPUT_DIR", $i);
+    $builddir = set_test_option("BUILD_DIR", $i);
+
+    chdir $builddir || dodie "can't change directory to $builddir";
+
+    if (!-d $outputdir) {
+	mkpath($outputdir) or
+	    dodie "can't create $outputdir";
+    }
+
+    $make = "$makecmd O=$outputdir";
+
+    # Load all the options into their mapped variable names
+    foreach my $opt (keys %option_map) {
+	${$option_map{$opt}} = set_test_option($opt, $i);
+    }
+
+    $start_minconfig_defined = 1;
+
+    # The first test may override the PRE_KTEST option
+    if ($i == 1) {
+        if (defined($pre_ktest)) {
+            doprint "\n";
+            run_command $pre_ktest;
+        }
+        if ($email_when_started) {
+            send_email("KTEST: Your [$test_type] test was started",
+                "Your test was started on $script_start_time");
+        }
+    }
+
+    # Any test can override the POST_KTEST option
+    # The last test takes precedence.
+    if (defined($post_ktest)) {
+	$final_post_ktest = $post_ktest;
+    }
+
+    if (!defined($start_minconfig)) {
+	$start_minconfig_defined = 0;
+	$start_minconfig = $minconfig;
+    }
+
+    if (!-d $tmpdir) {
+	mkpath($tmpdir) or
+	    dodie "can't create $tmpdir";
+    }
+
+    $ENV{"SSH_USER"} = $ssh_user;
+    $ENV{"MACHINE"} = $machine;
+
+    $buildlog = "$tmpdir/buildlog-$machine";
+    $testlog = "$tmpdir/testlog-$machine";
+    $dmesg = "$tmpdir/dmesg-$machine";
+    $output_config = "$outputdir/.config";
+
+    if (!$buildonly) {
+	$target = "$ssh_user\@$machine";
+	if ($reboot_type eq "grub") {
+	    dodie "GRUB_MENU not defined" if (!defined($grub_menu));
+	} elsif ($reboot_type eq "grub2") {
+	    dodie "GRUB_MENU not defined" if (!defined($grub_menu));
+	    dodie "GRUB_FILE not defined" if (!defined($grub_file));
+	} elsif ($reboot_type eq "syslinux") {
+	    dodie "SYSLINUX_LABEL not defined" if (!defined($syslinux_label));
+	}
+    }
+
+    my $run_type = $build_type;
+    if ($test_type eq "patchcheck") {
+	$run_type = $patchcheck_type;
+    } elsif ($test_type eq "bisect") {
+	$run_type = $bisect_type;
+    } elsif ($test_type eq "config_bisect") {
+	$run_type = $config_bisect_type;
+    } elsif ($test_type eq "make_min_config") {
+	$run_type = "";
+    } elsif ($test_type eq "make_warnings_file") {
+	$run_type = "";
+    }
+
+    # mistake in config file?
+    if (!defined($run_type)) {
+	$run_type = "ERROR";
+    }
+
+    my $installme = "";
+    $installme = " no_install" if ($no_install);
+
+    my $name = "";
+
+    if (defined($test_name)) {
+	$name = " ($test_name)";
+    }
+
+    doprint "\n\n";
+    doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n";
+
+    if (defined($pre_test)) {
+	run_command $pre_test;
+    }
+
+    unlink $dmesg;
+    unlink $buildlog;
+    unlink $testlog;
+
+    if (defined($addconfig)) {
+	my $min = $minconfig;
+	if (!defined($minconfig)) {
+	    $min = "";
+	}
+	run_command "cat $addconfig $min > $tmpdir/add_config" or
+	    dodie "Failed to create temp config";
+	$minconfig = "$tmpdir/add_config";
+    }
+
+    if (defined($checkout)) {
+	run_command "git checkout $checkout" or
+	    dodie "failed to checkout $checkout";
+    }
+
+    $no_reboot = 0;
+
+    # A test may opt to not reboot the box
+    if ($reboot_on_success) {
+	$reboot_success = 1;
+    }
+
+    if ($test_type eq "bisect") {
+	bisect $i;
+	next;
+    } elsif ($test_type eq "config_bisect") {
+	config_bisect $i;
+	next;
+    } elsif ($test_type eq "patchcheck") {
+	patchcheck $i;
+	next;
+    } elsif ($test_type eq "make_min_config") {
+	make_min_config $i;
+	next;
+    } elsif ($test_type eq "make_warnings_file") {
+	$no_reboot = 1;
+	make_warnings_file $i;
+	next;
+    }
+
+    if ($build_type ne "nobuild") {
+	build $build_type or next;
+	check_buildlog or next;
+    }
+
+    if ($test_type eq "install") {
+	get_version;
+	install;
+	success $i;
+	next;
+    }
+
+    if ($test_type ne "build") {
+	my $failed = 0;
+	start_monitor_and_install or $failed = 1;
+
+	if (!$failed && $test_type ne "boot" && defined($run_test)) {
+	    do_run_test or $failed = 1;
+	}
+	end_monitor;
+	if ($failed) {
+	    print_times;
+	    next;
+	}
+    }
+
+    print_times;
+
+    success $i;
+}
+
+if (defined($final_post_ktest)) {
+    run_command $final_post_ktest;
+}
+
+if ($opt{"POWEROFF_ON_SUCCESS"}) {
+    halt;
+} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot && $reboot_success) {
+    reboot_to_good;
+} elsif (defined($switch_to_good)) {
+    # still need to get to the good kernel
+    run_command $switch_to_good;
+}
+
+
+doprint "\n    $successes of $opt{NUM_TESTS} tests were successful\n\n";
+
+if ($email_when_finished) {
+    send_email("KTEST: Your [$test_type] test has finished!",
+            "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
+}
+exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
new file mode 100644
index 000000000..6ca6ca0ce
--- /dev/null
+++ b/tools/testing/ktest/sample.conf
@@ -0,0 +1,1348 @@
+#
+# Config file for ktest.pl
+#
+# Place your customized version of this, in the working directory that
+# ktest.pl is run from. By default, ktest.pl will look for a file
+# called "ktest.conf", but you can name it anything you like and specify
+# the name of your config file as the first argument of ktest.pl.
+#
+# Note, all paths must be absolute
+#
+
+# Options set in the beginning of the file are considered to be
+# default options. These options can be overriden by test specific
+# options, with the following exceptions:
+#
+#  LOG_FILE
+#  CLEAR_LOG
+#  POWEROFF_ON_SUCCESS
+#  REBOOT_ON_SUCCESS
+#
+# Test specific options are set after the label:
+#
+# TEST_START
+#
+# The options after a TEST_START label are specific to that test.
+# Each TEST_START label will set up a new test. If you want to
+# perform a test more than once, you can add the ITERATE label
+# to it followed by the number of times you want that test
+# to iterate. If the ITERATE is left off, the test will only
+# be performed once.
+#
+# TEST_START ITERATE 10
+#
+# You can skip a test by adding SKIP (before or after the ITERATE
+# and number)
+#
+# TEST_START SKIP
+#
+# TEST_START SKIP ITERATE 10
+#
+# TEST_START ITERATE 10 SKIP
+#
+# The SKIP label causes the options and the test itself to be ignored.
+# This is useful to set up several different tests in one config file, and
+# only enabling the ones you want to use for a current test run.
+#
+# You can add default options anywhere in the file as well
+# with the DEFAULTS tag. This allows you to have default options
+# after the test options to keep the test options at the top
+# of the file. You can even place the DEFAULTS tag between
+# test cases (but not in the middle of a single test case)
+#
+# TEST_START
+# MIN_CONFIG = /home/test/config-test1
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-default
+#
+# TEST_START ITERATE 10
+#
+# The above will run the first test with MIN_CONFIG set to
+# /home/test/config-test-1. Then 10 tests will be executed
+# with MIN_CONFIG with /home/test/config-default.
+#
+# You can also disable defaults with the SKIP option
+#
+# DEFAULTS SKIP
+# MIN_CONFIG = /home/test/config-use-sometimes
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-most-times
+#
+# The above will ignore the first MIN_CONFIG. If you want to
+# use the first MIN_CONFIG, remove the SKIP from the first
+# DEFAULTS tag and add it to the second. Be careful, options
+# may only be declared once per test or default. If you have
+# the same option name under the same test or as default
+# ktest will fail to execute, and no tests will run.
+#
+# DEFAULTS OVERRIDE
+#
+# Options defined in the DEFAULTS section can not be duplicated
+# even if they are defined in two different DEFAULT sections.
+# This is done to catch mistakes where an option is added but
+# the previous option was forgotten about and not commented.
+#
+# The OVERRIDE keyword can be added to a section to allow this
+# section to override other DEFAULT sections values that have
+# been defined previously. It will only override options that
+# have been defined before its use. Options defined later
+# in a non override section will still error. The same option
+# can not be defined in the same section even if that section
+# is marked OVERRIDE.
+#
+#
+#
+# Both TEST_START and DEFAULTS sections can also have the IF keyword
+# The value after the IF must evaluate into a 0 or non 0 positive
+# integer, and can use the config variables (explained below).
+#
+# DEFAULTS IF ${IS_X86_32}
+#
+# The above will process the DEFAULTS section if the config
+# variable IS_X86_32 evaluates to a non zero positive integer
+# otherwise if it evaluates to zero, it will act the same
+# as if the SKIP keyword was used.
+#
+# The ELSE keyword can be used directly after a section with
+# a IF statement.
+#
+# TEST_START IF ${RUN_NET_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# ELSE
+#
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-normal
+#
+#
+# The ELSE keyword can also contain an IF statement to allow multiple
+# if then else sections. But all the sections must be either
+# DEFAULT or TEST_START, they can not be a mixture.
+#
+# TEST_START IF ${RUN_NET_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# ELSE IF ${RUN_DISK_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-tests
+#
+# ELSE IF ${RUN_CPU_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-cpu
+#
+# ELSE
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# The if statement may also have comparisons that will and for
+# == and !=, strings may be used for both sides.
+#
+# BOX_TYPE := x86_32
+#
+# DEFAULTS IF ${BOX_TYPE} == x86_32
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-32
+# ELSE
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-64
+#
+# The DEFINED keyword can be used by the IF statements too.
+# It returns true if the given config variable or option has been defined
+# or false otherwise.
+#
+# 
+# DEFAULTS IF DEFINED USE_CC
+# CC := ${USE_CC}
+# ELSE
+# CC := gcc
+#
+#
+# As well as NOT DEFINED.
+#
+# DEFAULTS IF NOT DEFINED MAKE_CMD
+# MAKE_CMD := make ARCH=x86
+#
+#
+# And/or ops (&&,||) may also be used to make complex conditionals.
+#
+# TEST_START IF (DEFINED ALL_TESTS || ${MYTEST} == boottest) && ${MACHINE} == gandalf
+#
+# Notice the use of parentheses. Without any parentheses the above would be
+# processed the same as:
+#
+# TEST_START IF DEFINED ALL_TESTS || (${MYTEST} == boottest && ${MACHINE} == gandalf)
+#
+#
+#
+# INCLUDE file
+#
+# The INCLUDE keyword may be used in DEFAULT sections. This will
+# read another config file and process that file as well. The included
+# file can include other files, add new test cases or default
+# statements. Config variables will be passed to these files and changes
+# to config variables will be seen by top level config files. Including
+# a file is processed just like the contents of the file was cut and pasted
+# into the top level file, except, that include files that end with
+# TEST_START sections will have that section ended at the end of
+# the include file. That is, an included file is included followed
+# by another DEFAULT keyword.
+#
+# Unlike other files referenced in this config, the file path does not need
+# to be absolute. If the file does not start with '/', then the directory
+# that the current config file was located in is used. If no config by the
+# given name is found there, then the current directory is searched.
+#
+# INCLUDE myfile
+# DEFAULT
+#
+# is the same as:
+#
+# INCLUDE myfile
+#
+# Note, if the include file does not contain a full path, the file is
+# searched first by the location of the original include file, and then
+# by the location that ktest.pl was executed in.
+#
+
+#### Config variables ####
+#
+# This config file can also contain "config variables".
+# These are assigned with ":=" instead of the ktest option
+# assigment "=".
+#
+# The difference between ktest options and config variables
+# is that config variables can be used multiple times,
+# where each instance will override the previous instance.
+# And that they only live at time of processing this config.
+#
+# The advantage to config variables are that they can be used
+# by any option or any other config variables to define thing
+# that you may use over and over again in the options.
+#
+# For example:
+#
+# USER      := root
+# TARGET    := mybox
+# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test
+#
+# TEST_START
+# MIN_CONFIG = config1
+# TEST = ${TEST_CASE}
+#
+# TEST_START
+# MIN_CONFIG = config2
+# TEST = ${TEST_CASE}
+#
+# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test2
+#
+# TEST_START
+# MIN_CONFIG = config1
+# TEST = ${TEST_CASE}
+#
+# TEST_START
+# MIN_CONFIG = config2
+# TEST = ${TEST_CASE}
+#
+# TEST_DIR := /home/me/test
+#
+# BUILD_DIR = ${TEST_DIR}/linux.git
+# OUTPUT_DIR = ${TEST_DIR}/test
+#
+# Note, the config variables are evaluated immediately, thus
+# updating TARGET after TEST_CASE has been assigned does nothing
+# to TEST_CASE.
+#
+# As shown in the example, to evaluate a config variable, you
+# use the ${X} convention. Simple $X will not work.
+#
+# If the config variable does not exist, the ${X} will not
+# be evaluated. Thus:
+#
+# MAKE_CMD = PATH=/mypath:${PATH} make
+#
+# If PATH is not a config variable, then the ${PATH} in
+# the MAKE_CMD option will be evaluated by the shell when
+# the MAKE_CMD option is passed into shell processing.
+
+#### Using options in other options ####
+#
+# Options that are defined in the config file may also be used
+# by other options. All options are evaulated at time of
+# use (except that config variables are evaluated at config
+# processing time).
+#
+# If an ktest option is used within another option, instead of
+# typing it again in that option you can simply use the option
+# just like you can config variables.
+#
+# MACHINE = mybox
+#
+# TEST = ssh root@${MACHINE} /path/to/test
+#
+# The option will be used per test case. Thus:
+#
+# TEST_TYPE = test
+# TEST = ssh root@{MACHINE}
+#
+# TEST_START
+# MACHINE = box1
+#
+# TEST_START
+# MACHINE = box2
+#
+# For both test cases, MACHINE will be evaluated at the time
+# of the test case. The first test will run ssh root@box1
+# and the second will run ssh root@box2.
+
+#### Mandatory Default Options ####
+
+# These options must be in the default section, although most
+# may be overridden by test options.
+
+# The machine hostname that you will test
+#MACHINE = target
+
+# The box is expected to have ssh on normal bootup, provide the user
+#  (most likely root, since you need privileged operations)
+#SSH_USER = root
+
+# The directory that contains the Linux source code
+#BUILD_DIR = /home/test/linux.git
+
+# The directory that the objects will be built
+# (can not be same as BUILD_DIR)
+#OUTPUT_DIR = /home/test/build/target
+
+# The location of the compiled file to copy to the target
+# (relative to OUTPUT_DIR)
+#BUILD_TARGET = arch/x86/boot/bzImage
+
+# The place to put your image on the test machine
+#TARGET_IMAGE = /boot/vmlinuz-test
+
+# A script or command to reboot the box
+#
+# Here is a digital loggers power switch example
+#POWER_CYCLE = wget --no-proxy -O /dev/null -q  --auth-no-challenge 'http://admin:admin@power/outlet?5=CCL'
+#
+# Here is an example to reboot a virtual box on the current host
+# with the name "Guest".
+#POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+
+# The script or command that reads the console
+#
+#  If you use ttywatch server, something like the following would work.
+#CONSOLE = nc -d localhost 3001
+#
+# For a virtual machine with guest name "Guest".
+#CONSOLE =  virsh console Guest
+
+# Signal to send to kill console.
+# ktest.pl will create a child process to monitor the console.
+# When the console is finished, ktest will kill the child process
+# with this signal.
+# (default INT)
+#CLOSE_CONSOLE_SIGNAL = HUP
+
+# Required version ending to differentiate the test
+# from other linux builds on the system.
+#LOCALVERSION = -test
+
+# For REBOOT_TYPE = grub2, you must specify where the grub.cfg
+# file is. This is the file that is searched to find the menu
+# option to boot to with GRUB_REBOOT
+#GRUB_FILE = /boot/grub2/grub.cfg
+
+# The tool for REBOOT_TYPE = grub2 to set the next reboot kernel
+# to boot into (one shot mode).
+# (default grub2_reboot)
+#GRUB_REBOOT = grub2_reboot
+
+# The grub title name for the test kernel to boot
+# (Only mandatory if REBOOT_TYPE = grub or grub2)
+#
+# Note, ktest.pl will not update the grub menu.lst, you need to
+# manually add an option for the test. ktest.pl will search
+# the grub menu.lst for this option to find what kernel to
+# reboot into.
+#
+# For example, if in the /boot/grub/menu.lst the test kernel title has:
+# title Test Kernel
+# kernel vmlinuz-test
+#
+# For grub2, a search of top level "menuentry"s are done. No
+# submenu is searched. The menu is found by searching for the
+# contents of GRUB_MENU in the line that starts with "menuentry".
+# You may want to include the quotes around the option. For example:
+# for: menuentry 'Test Kernel'
+# do a: GRUB_MENU = 'Test Kernel'
+# For customizing, add your entry in /etc/grub.d/40_custom.
+#
+#GRUB_MENU = Test Kernel
+
+# For REBOOT_TYPE = syslinux, the name of the syslinux executable
+# (on the target) to use to set up the next reboot to boot the
+# test kernel.
+# (default extlinux)
+#SYSLINUX = syslinux
+
+# For REBOOT_TYPE = syslinux, the path that is passed to to the
+# syslinux command where syslinux is installed.
+# (default /boot/extlinux)
+#SYSLINUX_PATH = /boot/syslinux
+
+# For REBOOT_TYPE = syslinux, the syslinux label that references the
+# test kernel in the syslinux config file.
+# (default undefined)
+#SYSLINUX_LABEL = "test-kernel"
+
+# A script to reboot the target into the test kernel
+# This and SWITCH_TO_TEST are about the same, except
+# SWITCH_TO_TEST is run even for REBOOT_TYPE = grub.
+# This may be left undefined.
+# (default undefined)
+#REBOOT_SCRIPT =
+
+#### Optional Config Options (all have defaults) ####
+
+# Email options for receiving notifications. Users must setup
+# the specified mailer prior to using this feature.
+#
+# (default undefined)
+#MAILTO =
+#
+# Supported mailers: sendmail, mail, mailx
+# (default sendmail)
+#MAILER = sendmail
+#
+# The executable to run
+# (default: for sendmail "/usr/sbin/sendmail", otherwise equals ${MAILER})
+#MAIL_EXEC = /usr/sbin/sendmail
+#
+# The command used to send mail, which uses the above options
+# can be modified. By default if the mailer is "sendmail" then
+#  MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+# For mail or mailx:
+#  MAIL_COMMAND = "$MAIL_PATH/$MAILER -s \'$SUBJECT\' $MAILTO <<< \'$MESSAGE\'
+# ktest.pl will do the substitution for MAIL_PATH, MAILER, MAILTO at the time
+#    it sends the mail if "$FOO" format is used. If "${FOO}" format is used,
+#    then the substitutions will occur at the time the config file is read.
+#    But note, MAIL_PATH and MAILER require being set by the config file if
+#     ${MAIL_PATH} or ${MAILER} are used, but not if $MAIL_PATH or $MAILER are.
+#MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+#
+# Errors are defined as those would terminate the script
+# (default 1)
+#EMAIL_ON_ERROR = 1
+# (default 1)
+#EMAIL_WHEN_FINISHED = 1
+# (default 0)
+#EMAIL_WHEN_STARTED = 1
+#
+# Users can cancel the test by Ctrl^C
+# (default 0)
+#EMAIL_WHEN_CANCELED = 1
+
+# Start a test setup. If you leave this off, all options
+# will be default and the test will run once.
+# This is a label and not really an option (it takes no value).
+# You can append ITERATE and a number after it to iterate the
+# test a number of times, or SKIP to ignore this test.
+#
+#TEST_START
+#TEST_START ITERATE 5
+#TEST_START SKIP
+
+# Have the following options as default again. Used after tests
+# have already been defined by TEST_START. Optionally, you can
+# just define all default options before the first TEST_START
+# and you do not need this option.
+#
+# This is a label and not really an option (it takes no value).
+# You can append SKIP to this label and the options within this
+# section will be ignored.
+#
+# DEFAULTS
+# DEFAULTS SKIP
+
+# If you want to execute some command before the first test runs
+# you can set this option. Note, it can be set as a default option
+# or an option in the first test case. All other test cases will
+# ignore it. If both the default and first test have this option
+# set, then the first test will take precedence.
+#
+# default (undefined)
+#PRE_KTEST = ${SSH} ~/set_up_test
+
+# If you want to execute some command after all the tests have
+# completed, you can set this option. Note, it can be set as a
+# default or any test case can override it. If multiple test cases
+# set this option, then the last test case that set it will take
+# precedence
+#
+# default (undefined)
+#POST_KTEST = ${SSH} ~/dismantle_test
+
+# The default test type (default test)
+# The test types may be:
+#   build   - only build the kernel, do nothing else
+#   install - build and install, but do nothing else (does not reboot)
+#   boot    - build, install, and boot the kernel
+#   test    - build, boot and if TEST is set, run the test script
+#          (If TEST is not set, it defaults back to boot)
+#   bisect - Perform a bisect on the kernel (see BISECT_TYPE below)
+#   patchcheck - Do a test on a series of commits in git (see PATCHCHECK below)
+#TEST_TYPE = test
+
+# Test to run if there is a successful boot and TEST_TYPE is test.
+# Must exit with 0 on success and non zero on error
+# default (undefined)
+#TEST = ssh user@machine /root/run_test
+
+# The build type is any make config type or special command
+#  (default randconfig)
+#   nobuild - skip the clean and build step
+#   useconfig:/path/to/config - use the given config and run
+#              oldconfig on it.
+# This option is ignored if TEST_TYPE is patchcheck or bisect
+#BUILD_TYPE = randconfig
+
+# The make command (default make)
+# If you are building a 32bit x86 on a 64 bit host
+#MAKE_CMD = CC=i386-gcc AS=i386-as make ARCH=i386
+
+# Any build options for the make of the kernel (not for other makes, like configs)
+# (default "")
+#BUILD_OPTIONS = -j20
+
+# If you need to do some special handling before installing
+# you can add a script with this option.
+# The environment variable KERNEL_VERSION will be set to the
+# kernel version that is used.
+#
+# default (undefined)
+#PRE_INSTALL = ssh user@target rm -rf '/lib/modules/*-test*'
+
+# If you need an initrd, you can add a script or code here to install
+# it. The environment variable KERNEL_VERSION will be set to the
+# kernel version that is used. Remember to add the initrd line
+# to your grub menu.lst file.
+#
+# Here's a couple of examples to use:
+#POST_INSTALL = ssh user@target /sbin/mkinitrd --allow-missing -f /boot/initramfs-test.img $KERNEL_VERSION
+#
+# or on some systems:
+#POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# If for some reason you just want to boot the kernel and you do not
+# want the test to install anything new. For example, you may just want
+# to boot test the same kernel over and over and do not want to go through
+# the hassle of installing anything, you can set this option to 1
+# (default 0)
+#NO_INSTALL = 1
+
+# If there is a command that you want to run before the individual test
+# case executes, then you can set this option
+#
+# default (undefined)
+#PRE_TEST = ${SSH} reboot_to_special_kernel
+
+# If there is a command you want to run after the individual test case
+# completes, then you can set this option.
+#
+# default (undefined)
+#POST_TEST = cd ${BUILD_DIR}; git reset --hard
+
+# If there is a script that you require to run before the build is done
+# you can specify it with PRE_BUILD.
+#
+# One example may be if you must add a temporary patch to the build to
+# fix a unrelated bug to perform a patchcheck test. This will apply the
+# patch before each build that is made. Use the POST_BUILD to do a git reset --hard
+# to remove the patch.
+#
+# (default undef)
+#PRE_BUILD = cd ${BUILD_DIR} && patch -p1 < /tmp/temp.patch
+
+# To specify if the test should fail if the PRE_BUILD fails,
+# PRE_BUILD_DIE needs to be set to 1. Otherwise the PRE_BUILD
+# result is ignored.
+# (default 0)
+# PRE_BUILD_DIE = 1
+
+# If there is a script that should run after the build is done
+# you can specify it with POST_BUILD.
+#
+# As the example in PRE_BUILD, POST_BUILD can be used to reset modifications
+# made by the PRE_BUILD.
+#
+# (default undef)
+#POST_BUILD = cd ${BUILD_DIR} && git reset --hard
+
+# To specify if the test should fail if the POST_BUILD fails,
+# POST_BUILD_DIE needs to be set to 1. Otherwise the POST_BUILD
+# result is ignored.
+# (default 0)
+#POST_BUILD_DIE = 1
+
+# Way to reboot the box to the test kernel.
+# Only valid options so far are "grub", "grub2", "syslinux" and "script"
+# (default grub)
+# If you specify grub, it will assume grub version 1
+# and will search in /boot/grub/menu.lst for the title $GRUB_MENU
+# and select that target to reboot to the kernel. If this is not
+# your setup, then specify "script" and have a command or script
+# specified in REBOOT_SCRIPT to boot to the target.
+#
+# For REBOOT_TYPE = grub2, you must define both GRUB_MENU and
+# GRUB_FILE.
+#
+# For REBOOT_TYPE = syslinux, you must define SYSLINUX_LABEL, and
+# perhaps modify SYSLINUX (default extlinux) and SYSLINUX_PATH
+# (default /boot/extlinux)
+#
+# The entry in /boot/grub/menu.lst must be entered in manually.
+# The test will not modify that file.
+#REBOOT_TYPE = grub
+
+# If you are using a machine that doesn't boot with grub, and
+# perhaps gets its kernel from a remote server (tftp), then
+# you can use this option to update the target image with the
+# test image.
+#
+# You could also do the same with POST_INSTALL, but the difference
+# between that option and this option is that POST_INSTALL runs
+# after the install, where this one runs just before a reboot.
+# (default undefined)
+#SWITCH_TO_TEST = cp ${OUTPUT_DIR}/${BUILD_TARGET} ${TARGET_IMAGE}
+
+# If you are using a machine that doesn't boot with grub, and
+# perhaps gets its kernel from a remote server (tftp), then
+# you can use this option to update the target image with the
+# the known good image to reboot safely back into.
+#
+# This option holds a command that will execute before needing
+# to reboot to a good known image.
+# (default undefined)
+#SWITCH_TO_GOOD = ssh ${SSH_USER}/${MACHINE} cp good_image ${TARGET_IMAGE}
+
+# The min config that is needed to build for the machine
+# A nice way to create this is with the following:
+#
+#   $ ssh target
+#   $ lsmod > mymods
+#   $ scp mymods host:/tmp
+#   $ exit
+#   $ cd linux.git
+#   $ rm .config
+#   $ make LSMOD=mymods localyesconfig
+#   $ grep '^CONFIG' .config > /home/test/config-min
+#
+# If you want even less configs:
+#
+#   log in directly to target (do not ssh)
+#
+#   $ su
+#   # lsmod | cut -d' ' -f1 | xargs rmmod
+#
+#   repeat the above several times
+#
+#   # lsmod > mymods
+#   # reboot
+#
+# May need to reboot to get your network back to copy the mymods
+# to the host, and then remove the previous .config and run the
+# localyesconfig again. The CONFIG_MIN generated like this will
+# not guarantee network activity to the box so the TEST_TYPE of
+# test may fail.
+#
+# You might also want to set:
+#   CONFIG_CMDLINE="<your options here>"
+#  randconfig may set the above and override your real command
+#  line options.
+# (default undefined)
+#MIN_CONFIG = /home/test/config-min
+
+# Sometimes there's options that just break the boot and
+# you do not care about. Here are a few:
+#   # CONFIG_STAGING is not set
+#  Staging drivers are horrible, and can break the build.
+#   # CONFIG_SCSI_DEBUG is not set
+#  SCSI_DEBUG may change your root partition
+#   # CONFIG_KGDB_SERIAL_CONSOLE is not set
+#  KGDB may cause oops waiting for a connection that's not there.
+# This option points to the file containing config options that will be prepended
+# to the MIN_CONFIG (or be the MIN_CONFIG if it is not set)
+#
+# Note, config options in MIN_CONFIG will override these options.
+#
+# (default undefined)
+#ADD_CONFIG = /home/test/config-broken
+
+# The location on the host where to write temp files
+# (default /tmp/ktest/${MACHINE})
+#TMP_DIR = /tmp/ktest/${MACHINE}
+
+# Optional log file to write the status (recommended)
+#  Note, this is a DEFAULT section only option.
+# (default undefined)
+#LOG_FILE = /home/test/logfiles/target.log
+
+# Remove old logfile if it exists before starting all tests.
+#  Note, this is a DEFAULT section only option.
+# (default 0)
+#CLEAR_LOG = 0
+
+# Line to define a successful boot up in console output.
+# This is what the line contains, not the entire line. If you need
+# the entire line to match, then use regural expression syntax like:
+#  (do not add any quotes around it)
+#
+#  SUCCESS_LINE = ^MyBox Login:$
+#
+# (default "login:")
+#SUCCESS_LINE = login:
+
+# To speed up between reboots, defining a line that the
+# default kernel produces that represents that the default
+# kernel has successfully booted and can be used to pass
+# a new test kernel to it. Otherwise ktest.pl will wait till
+# SLEEP_TIME to continue.
+# (default undefined)
+#REBOOT_SUCCESS_LINE = login:
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after success is recommended.
+# (in seconds)
+# (default 10)
+#STOP_AFTER_SUCCESS = 10
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after failure is recommended.
+# (in seconds)
+# (default 60)
+#STOP_AFTER_FAILURE = 60
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test if it never succeeds nor fails
+# is recommended.
+# Note: this is ignored if a success or failure is detected.
+# (in seconds)
+# (default 600, -1 is to never stop)
+#STOP_TEST_AFTER = 600
+
+# Stop testing if a build fails. If set, the script will end if
+# a failure is detected, otherwise it will save off the .config,
+# dmesg and bootlog in a directory called
+# MACHINE-TEST_TYPE_BUILD_TYPE-fail-yyyymmddhhmmss
+# if the STORE_FAILURES directory is set.
+# (default 1)
+# Note, even if this is set to zero, there are some errors that still
+# stop the tests.
+#DIE_ON_FAILURE = 1
+
+# Directory to store failure directories on failure. If this is not
+# set, DIE_ON_FAILURE=0 will not save off the .config, dmesg and
+# bootlog. This option is ignored if DIE_ON_FAILURE is not set.
+# (default undefined)
+#STORE_FAILURES = /home/test/failures
+
+# Directory to store success directories on success. If this is not
+# set, the .config, dmesg and bootlog will not be saved if a
+# test succeeds.
+# (default undefined)
+#STORE_SUCCESSES = /home/test/successes
+
+# Build without doing a make mrproper, or removing .config
+# (default 0)
+#BUILD_NOCLEAN = 0
+
+# As the test reads the console, after it hits the SUCCESS_LINE
+# the time it waits for the monitor to settle down between reads
+# can usually be lowered.
+# (in seconds) (default 1)
+#BOOTED_TIMEOUT = 1
+
+# The timeout in seconds when we consider the box hung after
+# the console stop producing output. Be sure to leave enough
+# time here to get pass a reboot. Some machines may not produce
+# any console output for a long time during a reboot. You do
+# not want the test to fail just because the system was in
+# the process of rebooting to the test kernel.
+# (default 120)
+#TIMEOUT = 120
+
+# The timeout in seconds when to test if the box can be rebooted
+# or not. Before issuing the reboot command, a ssh connection
+# is attempted to see if the target machine is still active.
+# If the target does not connect within this timeout, a power cycle
+# is issued instead of a reboot.
+# CONNECT_TIMEOUT = 25
+
+# In between tests, a reboot of the box may occur, and this
+# is the time to wait for the console after it stops producing
+# output. Some machines may not produce a large lag on reboot
+# so this should accommodate it.
+# The difference between this and TIMEOUT, is that TIMEOUT happens
+# when rebooting to the test kernel. This sleep time happens
+# after a test has completed and we are about to start running
+# another test. If a reboot to the reliable kernel happens,
+# we wait SLEEP_TIME for the console to stop producing output
+# before starting the next test.
+#
+# You can speed up reboot times even more by setting REBOOT_SUCCESS_LINE.
+# (default 60)
+#SLEEP_TIME = 60
+
+# The time in between bisects to sleep (in seconds)
+# (default 60)
+#BISECT_SLEEP_TIME = 60
+
+# The max wait time (in seconds) for waiting for the console to finish.
+# If for some reason, the console is outputting content without
+# ever finishing, this will cause ktest to get stuck. This
+# option is the max time ktest will wait for the monitor (console)
+# to settle down before continuing.
+# (default 1800)
+#MAX_MONITOR_WAIT
+
+# The time in between patch checks to sleep (in seconds)
+# (default 60)
+#PATCHCHECK_SLEEP_TIME = 60
+
+# Reboot the target box on error (default 0)
+#REBOOT_ON_ERROR = 0
+
+# Power off the target on error (ignored if REBOOT_ON_ERROR is set)
+#  Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_ERROR = 0
+
+# Power off the target after all tests have completed successfully
+#  Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_SUCCESS = 0
+
+# Reboot the target after all test completed successfully (default 1)
+# (ignored if POWEROFF_ON_SUCCESS is set)
+#REBOOT_ON_SUCCESS = 1
+
+# In case there are isses with rebooting, you can specify this
+# to always powercycle after this amount of time after calling
+# reboot.
+# Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just
+# makes it powercycle immediately after rebooting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWERCYCLE_AFTER_REBOOT = 5
+
+# In case there's isses with halting, you can specify this
+# to always poweroff after this amount of time after calling
+# halt.
+# Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just
+# makes it poweroff immediately after halting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWEROFF_AFTER_HALT = 20
+
+# A script or command to power off the box (default undefined)
+# Needed for POWEROFF_ON_ERROR and SUCCESS
+#
+# Example for digital loggers power switch:
+#POWER_OFF = wget --no-proxy -O /dev/null -q  --auth-no-challenge 'http://admin:admin@power/outlet?5=OFF'
+#
+# Example for a virtual guest call "Guest".
+#POWER_OFF = virsh destroy Guest
+
+# To have the build fail on "new" warnings, create a file that
+# contains a list of all known warnings (they must match exactly
+# to the line with 'warning:', 'error:' or 'Error:'. If the option
+# WARNINGS_FILE is set, then that file will be read, and if the
+# build detects a warning, it will examine this file and if the
+# warning does not exist in it, it will fail the build.
+#
+# Note, if this option is defined to a file that does not exist
+# then any warning will fail the build.
+#  (see make_warnings_file below)
+#
+# (optional, default undefined)
+#WARNINGS_FILE = ${OUTPUT_DIR}/warnings_file
+
+# The way to execute a command on the target
+# (default ssh $SSH_USER@$MACHINE $SSH_COMMAND";)
+# The variables SSH_USER, MACHINE and SSH_COMMAND are defined
+#SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND";
+
+# The way to copy a file to the target (install and modules)
+# (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE)
+# The variables SSH_USER, MACHINE are defined by the config
+# SRC_FILE and DST_FILE are ktest internal variables and
+# should only have '$' and not the '${}' notation.
+# (default scp $SRC_FILE ${SSH_USER}@${MACHINE}:$DST_FILE)
+#SCP_TO_TARGET = echo skip scp for $SRC_FILE $DST_FILE
+
+# If install needs to be different than modules, then this
+# option will override the SCP_TO_TARGET for installation.
+# (default ${SCP_TO_TARGET} )
+#SCP_TO_TARGET_INSTALL = scp $SRC_FILE tftp@tftpserver:$DST_FILE
+
+# The nice way to reboot the target
+# (default ssh $SSH_USER@$MACHINE reboot)
+# The variables SSH_USER and MACHINE are defined.
+#REBOOT = ssh $SSH_USER@$MACHINE reboot
+
+# The way triple faults are detected is by testing the kernel
+# banner. If the kernel banner for the kernel we are testing is
+# found, and then later a kernel banner for another kernel version
+# is found, it is considered that we encountered a triple fault,
+# and there is no panic or callback, but simply a reboot.
+# To disable this (because it did a false positive) set the following
+# to 0.
+# (default 1)
+#DETECT_TRIPLE_FAULT = 0
+
+# All options in the config file should be either used by ktest
+# or could be used within a value of another option. If an option
+# in the config file is not used, ktest will warn about it and ask
+# if you want to continue.
+#
+# If you don't care if there are non-used options, enable this
+# option. Be careful though, a non-used option is usually a sign
+# of an option name being typed incorrectly.
+# (default 0)
+#IGNORE_UNUSED = 1
+
+# When testing a kernel that happens to have WARNINGs, and call
+# traces, ktest.pl will detect these and fail a boot or test run
+# due to warnings. By setting this option, ktest will ignore
+# call traces, and will not fail a test if the kernel produces
+# an oops. Use this option with care.
+# (default 0)
+#IGNORE_ERRORS = 1
+
+#### Per test run options ####
+# The following options are only allowed in TEST_START sections.
+# They are ignored in the DEFAULTS sections.
+#
+# All of these are optional and undefined by default, although
+#  some of these options are required for TEST_TYPE of patchcheck
+#  and bisect.
+#
+#
+# CHECKOUT = branch
+#
+#  If the BUILD_DIR is a git repository, then you can set this option
+#  to checkout the given branch before running the TEST. If you
+#  specify this for the first run, that branch will be used for
+#  all preceding tests until a new CHECKOUT is set.
+#
+#
+# TEST_NAME = name
+#
+#  If you want the test to have a name that is displayed in
+#  the test result banner at the end of the test, then use this
+#  option. This is useful to search for the RESULT keyword and
+#  not have to translate a test number to a test in the config.
+#
+# For TEST_TYPE = patchcheck
+#
+#  This expects the BUILD_DIR to be a git repository, and
+#  will checkout the PATCHCHECK_START commit.
+#
+#  The option BUILD_TYPE will be ignored.
+#
+#  The MIN_CONFIG will be used for all builds of the patchcheck. The build type
+#  used for patchcheck is oldconfig.
+#
+#  PATCHCHECK_START is required and is the first patch to
+#   test (the SHA1 of the commit). You may also specify anything
+#   that git checkout allows (branch name, tage, HEAD~3).
+#
+#  PATCHCHECK_END is the last patch to check (default HEAD)
+#
+#  PATCHCHECK_CHERRY if set to non zero, then git cherry will be
+#      performed against PATCHCHECK_START and PATCHCHECK_END. That is
+#
+#      git cherry ${PATCHCHECK_START} ${PATCHCHECK_END}
+#
+#      Then the changes found will be tested.
+#
+#      Note, PATCHCHECK_CHERRY requires PATCHCHECK_END to be defined.
+#      (default 0)
+#
+#  PATCHCHECK_TYPE is required and is the type of test to run:
+#      build, boot, test.
+#
+#   Note, the build test will look for warnings, if a warning occurred
+#     in a file that a commit touches, the build will fail, unless
+#     IGNORE_WARNINGS is set for the given commit's sha1
+#
+#   IGNORE_WARNINGS can be used to disable the failure of patchcheck
+#     on a particuler commit (SHA1). You can add more than one commit
+#     by adding a list of SHA1s that are space delimited.
+#
+#   If BUILD_NOCLEAN is set, then make mrproper will not be run on
+#   any of the builds, just like all other TEST_TYPE tests. But
+#   what makes patchcheck different from the other tests, is if
+#   BUILD_NOCLEAN is not set, only the first and last patch run
+#   make mrproper. This helps speed up the test.
+#
+# Example:
+#   TEST_START
+#   TEST_TYPE = patchcheck
+#   CHECKOUT = mybranch
+#   PATCHCHECK_TYPE = boot
+#   PATCHCHECK_START = 747e94ae3d1b4c9bf5380e569f614eb9040b79e7
+#   PATCHCHECK_END = HEAD~2
+#   IGNORE_WARNINGS = 42f9c6b69b54946ffc0515f57d01dc7f5c0e4712 0c17ca2c7187f431d8ffc79e81addc730f33d128
+#
+#
+#
+# For TEST_TYPE = bisect
+#
+#  You can specify a git bisect if the BUILD_DIR is a git repository.
+#  The MIN_CONFIG will be used for all builds of the bisect. The build type
+#  used for bisecting is oldconfig.
+#
+#  The option BUILD_TYPE will be ignored.
+#
+#  BISECT_TYPE is the type of test to perform:
+#	build	- bad fails to build
+#	boot	- bad builds but fails to boot
+#	test	- bad boots but fails a test
+#
+# BISECT_GOOD is the commit (SHA1) to label as good (accepts all git good commit types)
+# BISECT_BAD is the commit to label as bad (accepts all git bad commit types)
+#
+# The above three options are required for a bisect operation.
+#
+# BISECT_REPLAY = /path/to/replay/file (optional, default undefined)
+#
+#   If an operation failed in the bisect that was not expected to
+#   fail. Then the test ends. The state of the BUILD_DIR will be
+#   left off at where the failure occurred. You can examine the
+#   reason for the failure, and perhaps even find a git commit
+#   that would work to continue with. You can run:
+#
+#   git bisect log > /path/to/replay/file
+#
+#   The adding:
+#
+#    BISECT_REPLAY= /path/to/replay/file
+#
+#   And running the test again. The test will perform the initial
+#    git bisect start, git bisect good, and git bisect bad, and
+#    then it will run git bisect replay on this file, before
+#    continuing with the bisect.
+#
+# BISECT_START = commit (optional, default undefined)
+#
+#   As with BISECT_REPLAY, if the test failed on a commit that
+#   just happen to have a bad commit in the middle of the bisect,
+#   and you need to skip it. If BISECT_START is defined, it
+#   will checkout that commit after doing the initial git bisect start,
+#   git bisect good, git bisect bad, and running the git bisect replay
+#   if the BISECT_REPLAY is set.
+#
+# BISECT_SKIP = 1 (optional, default 0)
+#
+#   If BISECT_TYPE is set to test but the build fails, ktest will
+#   simply fail the test and end their. You could use BISECT_REPLAY
+#   and BISECT_START to resume after you found a new starting point,
+#   or you could set BISECT_SKIP to 1. If BISECT_SKIP is set to 1,
+#   when something other than the BISECT_TYPE fails, ktest.pl will
+#   run "git bisect skip" and try again.
+#
+# BISECT_FILES = <path> (optional, default undefined)
+#
+#   To just run the git bisect on a specific path, set BISECT_FILES.
+#   For example:
+#
+#     BISECT_FILES = arch/x86 kernel/time
+#
+#   Will run the bisect with "git bisect start -- arch/x86 kernel/time"
+#
+# BISECT_REVERSE = 1 (optional, default 0)
+#
+#   In those strange instances where it was broken forever
+#   and you are trying to find where it started to work!
+#   Set BISECT_GOOD to the commit that was last known to fail
+#   Set BISECT_BAD to the commit that is known to start working.
+#   With BISECT_REVERSE = 1, The test will consider failures as
+#   good, and success as bad.
+#
+# BISECT_MANUAL = 1 (optional, default 0)
+#
+#   In case there's a problem with automating the bisect for
+#   whatever reason. (Can't reboot, want to inspect each iteration)
+#   Doing a BISECT_MANUAL will have the test wait for you to
+#   tell it if the test passed or failed after each iteration.
+#   This is basicall the same as running git bisect yourself
+#   but ktest will rebuild and install the kernel for you.
+#
+# BISECT_CHECK = 1 (optional, default 0)
+#
+#   Just to be sure the good is good and bad is bad, setting
+#   BISECT_CHECK to 1 will start the bisect by first checking
+#   out BISECT_BAD and makes sure it fails, then it will check
+#   out BISECT_GOOD and makes sure it succeeds before starting
+#   the bisect (it works for BISECT_REVERSE too).
+#
+#   You can limit the test to just check BISECT_GOOD or
+#   BISECT_BAD with BISECT_CHECK = good or
+#   BISECT_CHECK = bad, respectively.
+#
+# BISECT_TRIES = 5 (optional, default 1)
+#
+#   For those cases that it takes several tries to hit a bug,
+#   the BISECT_TRIES is useful. It is the number of times the
+#   test is ran before it says the kernel is good. The first failure
+#   will stop trying and mark the current SHA1 as bad.
+#
+#   Note, as with all race bugs, there's no guarantee that if
+#   it succeeds, it is really a good bisect. But it helps in case
+#   the bug is some what reliable.
+#
+#   You can set BISECT_TRIES to zero, and all tests will be considered
+#   good, unless you also set BISECT_MANUAL.
+#
+# BISECT_RET_GOOD = 0 (optional, default undefined)
+#
+#   In case the specificed test returns something other than just
+#   0 for good, and non-zero for bad, you can override 0 being
+#   good by defining BISECT_RET_GOOD.
+#
+# BISECT_RET_BAD = 1 (optional, default undefined)
+#
+#   In case the specificed test returns something other than just
+#   0 for good, and non-zero for bad, you can override non-zero being
+#   bad by defining BISECT_RET_BAD.
+#
+# BISECT_RET_ABORT = 255 (optional, default undefined)
+#
+#   If you need to abort the bisect if the test discovers something
+#   that was wrong, you can define BISECT_RET_ABORT to be the error
+#   code returned by the test in order to abort the bisect.
+#
+# BISECT_RET_SKIP = 2 (optional, default undefined)
+#
+#   If the test detects that the current commit is neither good
+#   nor bad, but something else happened (another bug detected)
+#   you can specify BISECT_RET_SKIP to an error code that the
+#   test returns when it should skip the current commit.
+#
+# BISECT_RET_DEFAULT = good (optional, default undefined)
+#
+#   You can override the default of what to do when the above
+#   options are not hit. This may be one of, "good", "bad",
+#   "abort" or "skip" (without the quotes).
+#
+#   Note, if you do not define any of the previous BISECT_RET_*
+#   and define BISECT_RET_DEFAULT, all bisects results will do
+#   what the BISECT_RET_DEFAULT has.
+#
+#
+# Example:
+#   TEST_START
+#   TEST_TYPE = bisect
+#   BISECT_GOOD = v2.6.36
+#   BISECT_BAD = b5153163ed580e00c67bdfecb02b2e3843817b3e
+#   BISECT_TYPE = build
+#   MIN_CONFIG = /home/test/config-bisect
+#
+#
+#
+# For TEST_TYPE = config_bisect
+#
+#  In those cases that you have two different configs. One of them
+#  work, the other does not, and you do not know what config causes
+#  the problem.
+#  The TEST_TYPE config_bisect will bisect the bad config looking for
+#  what config causes the failure.
+#
+#  The way it works is this:
+#
+#   You can specify a good config with CONFIG_BISECT_GOOD, otherwise it
+#   will use the MIN_CONFIG, and if that's not specified, it will use
+#   the config that comes with "make defconfig".
+#
+#   It runs both the good and bad configs through a make oldconfig to
+#   make sure that they are set up for the kernel that is checked out.
+#
+#   It then reads the configs that are set, as well as the ones that are
+#   not set for both the good and bad configs, and then compares them.
+#   It will set half of the good configs within the bad config (note,
+#   "set" means to make the bad config match the good config, a config
+#   in the good config that is off, will be turned off in the bad
+#   config. That is considered a "set").
+#
+#   It tests this new config and if it works, it becomes the new good
+#   config, otherwise it becomes the new bad config. It continues this
+#   process until there's only one config left and it will report that
+#   config.
+#
+#   The "bad config" can also be a config that is needed to boot but was
+#   disabled because it depended on something that wasn't set.
+#
+#   During this process, it saves the current good and bad configs in
+#   ${TMP_DIR}/good_config and ${TMP_DIR}/bad_config respectively.
+#   If you stop the test, you can copy them to a new location to
+#   reuse them again.
+#
+#   Although the MIN_CONFIG may be the config it starts with, the
+#   MIN_CONFIG is ignored.
+#
+#  The option BUILD_TYPE will be ignored.
+#
+#  CONFIG_BISECT_TYPE is the type of test to perform:
+#	build	- bad fails to build
+#	boot	- bad builds but fails to boot
+#	test	- bad boots but fails a test
+#
+#  CONFIG_BISECT is the config that failed to boot
+#
+#  If BISECT_MANUAL is set, it will pause between iterations.
+#  This is useful to use just ktest.pl just for the config bisect.
+#  If you set it to build, it will run the bisect and you can
+#  control what happens in between iterations. It will ask you if
+#  the test succeeded or not and continue the config bisect.
+#
+# CONFIG_BISECT_GOOD (optional)
+#  If you have a good config to start with, then you
+#  can specify it with CONFIG_BISECT_GOOD. Otherwise
+#  the MIN_CONFIG is the base, if MIN_CONFIG is not set
+#  It will build a config with "make defconfig"
+#
+# CONFIG_BISECT_CHECK (optional)
+#  Set this to 1 if you want to confirm that the config ktest
+#  generates (the bad config with the min config) is still bad.
+#  It may be that the min config fixes what broke the bad config
+#  and the test will not return a result.
+#  Set it to "good" to test only the good config and set it
+#  to "bad" to only test the bad config.
+#
+# CONFIG_BISECT_EXEC (optional)
+#  The config bisect is a separate program that comes with ktest.pl.
+#  By befault, it will look for:
+#    `pwd`/config-bisect.pl # the location ktest.pl was executed from.
+#  If it does not find it there, it will look for:
+#    `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl
+#  If it does not find it there, it will look for:
+#    ${BUILD_DIR}/tools/testing/ktest/config-bisect.pl
+#  Setting CONFIG_BISECT_EXEC will override where it looks.
+#
+# Example:
+#   TEST_START
+#   TEST_TYPE = config_bisect
+#   CONFIG_BISECT_TYPE = build
+#   CONFIG_BISECT = /home/test/config-bad
+#   MIN_CONFIG = /home/test/config-min
+#   BISECT_MANUAL = 1
+#
+#
+#
+# For TEST_TYPE = make_min_config
+#
+#  After doing a make localyesconfig, your kernel configuration may
+#  not be the most useful minimum configuration. Having a true minimum
+#  config that you can use against other configs is very useful if
+#  someone else has a config that breaks on your code. By only forcing
+#  those configurations that are truly required to boot your machine
+#  will give you less of a chance that one of your set configurations
+#  will make the bug go away. This will give you a better chance to
+#  be able to reproduce the reported bug matching the broken config.
+#
+#  Note, this does take some time, and may require you to run the
+#  test over night, or perhaps over the weekend. But it also allows
+#  you to interrupt it, and gives you the current minimum config
+#  that was found till that time.
+#
+#  Note, this test automatically assumes a BUILD_TYPE of oldconfig
+#  and its test type acts like boot.
+#  TODO: add a test version that makes the config do more than just
+#   boot, like having network access.
+#
+#  To save time, the test does not just grab any option and test
+#  it. The Kconfig files are examined to determine the dependencies
+#  of the configs. If a config is chosen that depends on another
+#  config, that config will be checked first. By checking the
+#  parents first, we can eliminate whole groups of configs that
+#  may have been enabled.
+#
+#  For example, if a USB device config is chosen and depends on CONFIG_USB,
+#  the CONFIG_USB will be tested before the device. If CONFIG_USB is
+#  found not to be needed, it, as well as all configs that depend on
+#  it, will be disabled and removed from the current min_config.
+#
+#  OUTPUT_MIN_CONFIG is the path and filename of the file that will
+#   be created from the MIN_CONFIG. If you interrupt the test, set
+#   this file as your new min config, and use it to continue the test.
+#   This file does not need to exist on start of test.
+#   This file is not created until a config is found that can be removed.
+#   If this file exists, you will be prompted if you want to use it
+#   as the min_config (overriding MIN_CONFIG) if START_MIN_CONFIG
+#   is not defined.
+#   (required field)
+#
+#  START_MIN_CONFIG is the config to use to start the test with.
+#   you can set this as the same OUTPUT_MIN_CONFIG, but if you do
+#   the OUTPUT_MIN_CONFIG file must exist.
+#   (default MIN_CONFIG)
+#
+#  IGNORE_CONFIG is used to specify a config file that has configs that
+#   you already know must be set. Configs are written here that have
+#   been tested and proved to be required. It is best to define this
+#   file if you intend on interrupting the test and running it where
+#   it left off. New configs that it finds will be written to this file
+#   and will not be tested again in later runs.
+#   (optional)
+#
+#  MIN_CONFIG_TYPE can be either 'boot' or 'test'. With 'boot' it will
+#   test if the created config can just boot the machine. If this is
+#   set to 'test', then the TEST option must be defined and the created
+#   config will not only boot the target, but also make sure that the
+#   config lets the test succeed. This is useful to make sure the final
+#   config that is generated allows network activity (ssh).
+#   (optional)
+#
+#  USE_OUTPUT_MIN_CONFIG set this to 1 if you do not want to be prompted
+#   about using the OUTPUT_MIN_CONFIG as the MIN_CONFIG as the starting
+#   point. Set it to 0 if you want to always just use the given MIN_CONFIG.
+#   If it is not defined, it will prompt you to pick which config
+#   to start with (MIN_CONFIG or OUTPUT_MIN_CONFIG).
+#
+# Example:
+#
+#  TEST_TYPE = make_min_config
+#  OUTPUT_MIN_CONFIG = /path/to/config-new-min
+#  START_MIN_CONFIG = /path/to/config-min
+#  IGNORE_CONFIG = /path/to/config-tested
+#  MIN_CONFIG_TYPE = test
+#  TEST = ssh ${USER}@${MACHINE} echo hi
+#
+#
+#
+#
+# For TEST_TYPE = make_warnings_file
+#
+# If you want the build to fail when a new warning is discovered
+# you set the WARNINGS_FILE to point to a file of known warnings.
+#
+# The test "make_warnings_file" will let you create a new warnings
+# file before you run other tests, like patchcheck.
+#
+# What this test does is to run just a build, you still need to
+# specify BUILD_TYPE to tell the test what type of config to use.
+# A BUILD_TYPE of nobuild will fail this test.
+#
+# The test will do the build and scan for all warnings. Any warning
+# it discovers will be saved in the WARNINGS_FILE (required) option.
+#
+# It is recommended (but not necessary) to make sure BUILD_NOCLEAN is
+# off, so that a full build is done (make mrproper is performed).
+# That way, all warnings will be captured.
+#
+# Example:
+#
+#  TEST_TYPE = make_warnings_file
+#  WARNINGS_FILE = ${OUTPUT_DIR}
+#  BUILD_TYPE = useconfig:oldconfig
+#  CHECKOUT = v3.8
+#  BUILD_NOCLEAN = 0
+#
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
new file mode 100644
index 000000000..0392153a0
--- /dev/null
+++ b/tools/testing/nvdimm/Kbuild
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0
+ldflags-y += --wrap=ioremap_wc
+ldflags-y += --wrap=memremap
+ldflags-y += --wrap=devm_ioremap_nocache
+ldflags-y += --wrap=devm_memremap
+ldflags-y += --wrap=devm_memunmap
+ldflags-y += --wrap=ioremap_nocache
+ldflags-y += --wrap=iounmap
+ldflags-y += --wrap=memunmap
+ldflags-y += --wrap=__devm_request_region
+ldflags-y += --wrap=__devm_release_region
+ldflags-y += --wrap=__request_region
+ldflags-y += --wrap=__release_region
+ldflags-y += --wrap=devm_memremap_pages
+ldflags-y += --wrap=insert_resource
+ldflags-y += --wrap=remove_resource
+ldflags-y += --wrap=acpi_evaluate_object
+ldflags-y += --wrap=acpi_evaluate_dsm
+
+DRIVERS := ../../../drivers
+NVDIMM_SRC := $(DRIVERS)/nvdimm
+ACPI_SRC := $(DRIVERS)/acpi/nfit
+DAX_SRC := $(DRIVERS)/dax
+ccflags-y := -I$(src)/$(NVDIMM_SRC)/
+
+obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
+obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
+obj-$(CONFIG_ND_BTT) += nd_btt.o
+obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
+obj-$(CONFIG_ACPI_NFIT) += nfit.o
+ifeq ($(CONFIG_DAX),m)
+obj-$(CONFIG_DAX) += dax.o
+endif
+obj-$(CONFIG_DEV_DAX) += device_dax.o
+obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
+
+nfit-y := $(ACPI_SRC)/core.o
+nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
+nfit-y += acpi_nfit_test.o
+nfit-y += config_check.o
+
+nd_pmem-y := $(NVDIMM_SRC)/pmem.o
+nd_pmem-y += pmem-dax.o
+nd_pmem-y += pmem_test.o
+nd_pmem-y += config_check.o
+
+nd_btt-y := $(NVDIMM_SRC)/btt.o
+nd_btt-y += config_check.o
+
+nd_blk-y := $(NVDIMM_SRC)/blk.o
+nd_blk-y += config_check.o
+
+nd_e820-y := $(NVDIMM_SRC)/e820.o
+nd_e820-y += config_check.o
+
+dax-y := $(DAX_SRC)/super.o
+dax-y += config_check.o
+
+device_dax-y := $(DAX_SRC)/device.o
+device_dax-y += dax-dev.o
+device_dax-y += device_dax_test.o
+device_dax-y += config_check.o
+
+dax_pmem-y := $(DAX_SRC)/pmem.o
+dax_pmem-y += config_check.o
+
+libnvdimm-y := $(NVDIMM_SRC)/core.o
+libnvdimm-y += $(NVDIMM_SRC)/bus.o
+libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/dimm.o
+libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/region.o
+libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/label.o
+libnvdimm-y += $(NVDIMM_SRC)/badrange.o
+libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
+libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
+libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
+libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
+libnvdimm-y += libnvdimm_test.o
+libnvdimm-y += config_check.o
+
+obj-m += test/
diff --git a/tools/testing/nvdimm/Makefile b/tools/testing/nvdimm/Makefile
new file mode 100644
index 000000000..c37a6a0bd
--- /dev/null
+++ b/tools/testing/nvdimm/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+KDIR ?= ../../../
+
+default:
+	$(MAKE) -C $(KDIR) M=$$PWD
+
+install: default
+	$(MAKE) -C $(KDIR) M=$$PWD modules_install
diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c
new file mode 100644
index 000000000..43521512e
--- /dev/null
+++ b/tools/testing/nvdimm/acpi_nfit_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(acpi_nfit);
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
new file mode 100644
index 000000000..cac891028
--- /dev/null
+++ b/tools/testing/nvdimm/config_check.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
+
+void check(void)
+{
+	/*
+	 * These kconfig symbols must be set to "m" for nfit_test to
+	 * load and operate.
+	 */
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
+	BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM));
+}
diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c
new file mode 100644
index 000000000..36ee3d879
--- /dev/null
+++ b/tools/testing/nvdimm/dax-dev.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include "test/nfit_test.h"
+#include <linux/mm.h>
+#include "../../../drivers/dax/dax-private.h"
+
+phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
+		unsigned long size)
+{
+	struct resource *res;
+	phys_addr_t addr;
+	int i;
+
+	for (i = 0; i < dev_dax->num_resources; i++) {
+		res = &dev_dax->res[i];
+		addr = pgoff * PAGE_SIZE + res->start;
+		if (addr >= res->start && addr <= res->end)
+			break;
+		pgoff -= PHYS_PFN(resource_size(res));
+	}
+
+	if (i < dev_dax->num_resources) {
+		res = &dev_dax->res[i];
+		if (addr + size - 1 <= res->end) {
+			if (get_nfit_res(addr)) {
+				struct page *page;
+
+				if (dev_dax->region->align > PAGE_SIZE)
+					return -1;
+
+				page = vmalloc_to_page((void *)addr);
+				return PFN_PHYS(page_to_pfn(page));
+			} else
+				return addr;
+		}
+	}
+
+	return -1;
+}
diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c
new file mode 100644
index 000000000..24b17bf42
--- /dev/null
+++ b/tools/testing/nvdimm/device_dax_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(device_dax);
diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c
new file mode 100644
index 000000000..00ca30b23
--- /dev/null
+++ b/tools/testing/nvdimm/libnvdimm_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(libnvdimm);
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
new file mode 100644
index 000000000..2e7fd8227
--- /dev/null
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include "test/nfit_test.h"
+#include <linux/blkdev.h>
+#include <pmem.h>
+#include <nd.h>
+
+long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
+		long nr_pages, void **kaddr, pfn_t *pfn)
+{
+	resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
+
+	if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
+					PFN_PHYS(nr_pages))))
+		return -EIO;
+
+	/*
+	 * Limit dax to a single page at a time given vmalloc()-backed
+	 * in the nfit_test case.
+	 */
+	if (get_nfit_res(pmem->phys_addr + offset)) {
+		struct page *page;
+
+		if (kaddr)
+			*kaddr = pmem->virt_addr + offset;
+		page = vmalloc_to_page(pmem->virt_addr + offset);
+		if (pfn)
+			*pfn = page_to_pfn_t(page);
+		pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
+				__func__, pmem, pgoff, page_to_pfn(page));
+
+		return 1;
+	}
+
+	if (kaddr)
+		*kaddr = pmem->virt_addr + offset;
+	if (pfn)
+		*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+	/*
+	 * If badblocks are present, limit known good range to the
+	 * requested range.
+	 */
+	if (unlikely(pmem->bb.count))
+		return nr_pages;
+	return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
+}
diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c
new file mode 100644
index 000000000..fd38f9227
--- /dev/null
+++ b/tools/testing/nvdimm/pmem_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(pmem);
diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild
new file mode 100644
index 000000000..fb3c3d7cd
--- /dev/null
+++ b/tools/testing/nvdimm/test/Kbuild
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y := -I$(src)/../../../../drivers/nvdimm/
+ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/
+
+obj-m += nfit_test.o
+obj-m += nfit_test_iomap.o
+
+nfit_test-y := nfit.o
+nfit_test_iomap-y := iomap.o
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
new file mode 100644
index 000000000..c6635fee2
--- /dev/null
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -0,0 +1,403 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/memremap.h>
+#include <linux/rculist.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pfn_t.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include "nfit_test.h"
+
+static LIST_HEAD(iomap_head);
+
+static struct iomap_ops {
+	nfit_test_lookup_fn nfit_test_lookup;
+	nfit_test_evaluate_dsm_fn evaluate_dsm;
+	struct list_head list;
+} iomap_ops = {
+	.list = LIST_HEAD_INIT(iomap_ops.list),
+};
+
+void nfit_test_setup(nfit_test_lookup_fn lookup,
+		nfit_test_evaluate_dsm_fn evaluate)
+{
+	iomap_ops.nfit_test_lookup = lookup;
+	iomap_ops.evaluate_dsm = evaluate;
+	list_add_rcu(&iomap_ops.list, &iomap_head);
+}
+EXPORT_SYMBOL(nfit_test_setup);
+
+void nfit_test_teardown(void)
+{
+	list_del_rcu(&iomap_ops.list);
+	synchronize_rcu();
+}
+EXPORT_SYMBOL(nfit_test_teardown);
+
+static struct nfit_test_resource *__get_nfit_res(resource_size_t resource)
+{
+	struct iomap_ops *ops;
+
+	ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
+	if (ops)
+		return ops->nfit_test_lookup(resource);
+	return NULL;
+}
+
+struct nfit_test_resource *get_nfit_res(resource_size_t resource)
+{
+	struct nfit_test_resource *res;
+
+	rcu_read_lock();
+	res = __get_nfit_res(resource);
+	rcu_read_unlock();
+
+	return res;
+}
+EXPORT_SYMBOL(get_nfit_res);
+
+void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
+		void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
+{
+	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+	if (nfit_res)
+		return (void __iomem *) nfit_res->buf + offset
+			- nfit_res->res.start;
+	return fallback_fn(offset, size);
+}
+
+void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
+		resource_size_t offset, unsigned long size)
+{
+	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+	if (nfit_res)
+		return (void __iomem *) nfit_res->buf + offset
+			- nfit_res->res.start;
+	return devm_ioremap_nocache(dev, offset, size);
+}
+EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
+
+void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
+		size_t size, unsigned long flags)
+{
+	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+	if (nfit_res)
+		return nfit_res->buf + offset - nfit_res->res.start;
+	return devm_memremap(dev, offset, size, flags);
+}
+EXPORT_SYMBOL(__wrap_devm_memremap);
+
+static void nfit_test_kill(void *_pgmap)
+{
+	struct dev_pagemap *pgmap = _pgmap;
+
+	pgmap->kill(pgmap->ref);
+}
+
+void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+	resource_size_t offset = pgmap->res.start;
+	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+	if (nfit_res) {
+		int rc;
+
+		rc = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
+		if (rc)
+			return ERR_PTR(rc);
+		return nfit_res->buf + offset - nfit_res->res.start;
+	}
+	return devm_memremap_pages(dev, pgmap);
+}
+EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
+
+pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
+{
+	struct nfit_test_resource *nfit_res = get_nfit_res(addr);
+
+	if (nfit_res)
+		flags &= ~PFN_MAP;
+        return phys_to_pfn_t(addr, flags);
+}
+EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
+
+void *__wrap_memremap(resource_size_t offset, size_t size,
+		unsigned long flags)
+{
+	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+	if (nfit_res)
+		return nfit_res->buf + offset - nfit_res->res.start;
+	return memremap(offset, size, flags);
+}
+EXPORT_SYMBOL(__wrap_memremap);
+
+void __wrap_devm_memunmap(struct device *dev, void *addr)
+{
+	struct nfit_test_resource *nfit_res = get_nfit_res((long) addr);
+
+	if (nfit_res)
+		return;
+	return devm_memunmap(dev, addr);
+}
+EXPORT_SYMBOL(__wrap_devm_memunmap);
+
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
+{
+	return __nfit_test_ioremap(offset, size, ioremap_nocache);
+}
+EXPORT_SYMBOL(__wrap_ioremap_nocache);
+
+void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size)
+{
+	return __nfit_test_ioremap(offset, size, ioremap_wc);
+}
+EXPORT_SYMBOL(__wrap_ioremap_wc);
+
+void __wrap_iounmap(volatile void __iomem *addr)
+{
+	struct nfit_test_resource *nfit_res = get_nfit_res((long) addr);
+	if (nfit_res)
+		return;
+	return iounmap(addr);
+}
+EXPORT_SYMBOL(__wrap_iounmap);
+
+void __wrap_memunmap(void *addr)
+{
+	struct nfit_test_resource *nfit_res = get_nfit_res((long) addr);
+
+	if (nfit_res)
+		return;
+	return memunmap(addr);
+}
+EXPORT_SYMBOL(__wrap_memunmap);
+
+static bool nfit_test_release_region(struct device *dev,
+		struct resource *parent, resource_size_t start,
+		resource_size_t n);
+
+static void nfit_devres_release(struct device *dev, void *data)
+{
+	struct resource *res = *((struct resource **) data);
+
+	WARN_ON(!nfit_test_release_region(NULL, &iomem_resource, res->start,
+			resource_size(res)));
+}
+
+static int match(struct device *dev, void *__res, void *match_data)
+{
+	struct resource *res = *((struct resource **) __res);
+	resource_size_t start = *((resource_size_t *) match_data);
+
+	return res->start == start;
+}
+
+static bool nfit_test_release_region(struct device *dev,
+		struct resource *parent, resource_size_t start,
+		resource_size_t n)
+{
+	if (parent == &iomem_resource) {
+		struct nfit_test_resource *nfit_res = get_nfit_res(start);
+
+		if (nfit_res) {
+			struct nfit_test_request *req;
+			struct resource *res = NULL;
+
+			if (dev) {
+				devres_release(dev, nfit_devres_release, match,
+						&start);
+				return true;
+			}
+
+			spin_lock(&nfit_res->lock);
+			list_for_each_entry(req, &nfit_res->requests, list)
+				if (req->res.start == start) {
+					res = &req->res;
+					list_del(&req->list);
+					break;
+				}
+			spin_unlock(&nfit_res->lock);
+
+			WARN(!res || resource_size(res) != n,
+					"%s: start: %llx n: %llx mismatch: %pr\n",
+						__func__, start, n, res);
+			if (res)
+				kfree(req);
+			return true;
+		}
+	}
+	return false;
+}
+
+static struct resource *nfit_test_request_region(struct device *dev,
+		struct resource *parent, resource_size_t start,
+		resource_size_t n, const char *name, int flags)
+{
+	struct nfit_test_resource *nfit_res;
+
+	if (parent == &iomem_resource) {
+		nfit_res = get_nfit_res(start);
+		if (nfit_res) {
+			struct nfit_test_request *req;
+			struct resource *res = NULL;
+
+			if (start + n > nfit_res->res.start
+					+ resource_size(&nfit_res->res)) {
+				pr_debug("%s: start: %llx n: %llx overflow: %pr\n",
+						__func__, start, n,
+						&nfit_res->res);
+				return NULL;
+			}
+
+			spin_lock(&nfit_res->lock);
+			list_for_each_entry(req, &nfit_res->requests, list)
+				if (start == req->res.start) {
+					res = &req->res;
+					break;
+				}
+			spin_unlock(&nfit_res->lock);
+
+			if (res) {
+				WARN(1, "%pr already busy\n", res);
+				return NULL;
+			}
+
+			req = kzalloc(sizeof(*req), GFP_KERNEL);
+			if (!req)
+				return NULL;
+			INIT_LIST_HEAD(&req->list);
+			res = &req->res;
+
+			res->start = start;
+			res->end = start + n - 1;
+			res->name = name;
+			res->flags = resource_type(parent);
+			res->flags |= IORESOURCE_BUSY | flags;
+			spin_lock(&nfit_res->lock);
+			list_add(&req->list, &nfit_res->requests);
+			spin_unlock(&nfit_res->lock);
+
+			if (dev) {
+				struct resource **d;
+
+				d = devres_alloc(nfit_devres_release,
+						sizeof(struct resource *),
+						GFP_KERNEL);
+				if (!d)
+					return NULL;
+				*d = res;
+				devres_add(dev, d);
+			}
+
+			pr_debug("%s: %pr\n", __func__, res);
+			return res;
+		}
+	}
+	if (dev)
+		return __devm_request_region(dev, parent, start, n, name);
+	return __request_region(parent, start, n, name, flags);
+}
+
+struct resource *__wrap___request_region(struct resource *parent,
+		resource_size_t start, resource_size_t n, const char *name,
+		int flags)
+{
+	return nfit_test_request_region(NULL, parent, start, n, name, flags);
+}
+EXPORT_SYMBOL(__wrap___request_region);
+
+int __wrap_insert_resource(struct resource *parent, struct resource *res)
+{
+	if (get_nfit_res(res->start))
+		return 0;
+	return insert_resource(parent, res);
+}
+EXPORT_SYMBOL(__wrap_insert_resource);
+
+int __wrap_remove_resource(struct resource *res)
+{
+	if (get_nfit_res(res->start))
+		return 0;
+	return remove_resource(res);
+}
+EXPORT_SYMBOL(__wrap_remove_resource);
+
+struct resource *__wrap___devm_request_region(struct device *dev,
+		struct resource *parent, resource_size_t start,
+		resource_size_t n, const char *name)
+{
+	if (!dev)
+		return NULL;
+	return nfit_test_request_region(dev, parent, start, n, name, 0);
+}
+EXPORT_SYMBOL(__wrap___devm_request_region);
+
+void __wrap___release_region(struct resource *parent, resource_size_t start,
+		resource_size_t n)
+{
+	if (!nfit_test_release_region(NULL, parent, start, n))
+		__release_region(parent, start, n);
+}
+EXPORT_SYMBOL(__wrap___release_region);
+
+void __wrap___devm_release_region(struct device *dev, struct resource *parent,
+		resource_size_t start, resource_size_t n)
+{
+	if (!nfit_test_release_region(dev, parent, start, n))
+		__devm_release_region(dev, parent, start, n);
+}
+EXPORT_SYMBOL(__wrap___devm_release_region);
+
+acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
+		struct acpi_object_list *p, struct acpi_buffer *buf)
+{
+	struct nfit_test_resource *nfit_res = get_nfit_res((long) handle);
+	union acpi_object **obj;
+
+	if (!nfit_res || strcmp(path, "_FIT") || !buf)
+		return acpi_evaluate_object(handle, path, p, buf);
+
+	obj = nfit_res->buf;
+	buf->length = sizeof(union acpi_object);
+	buf->pointer = *obj;
+	return AE_OK;
+}
+EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
+
+union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
+		u64 rev, u64 func, union acpi_object *argv4)
+{
+	union acpi_object *obj = ERR_PTR(-ENXIO);
+	struct iomap_ops *ops;
+
+	rcu_read_lock();
+	ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
+	if (ops)
+		obj = ops->evaluate_dsm(handle, guid, rev, func, argv4);
+	rcu_read_unlock();
+
+	if (IS_ERR(obj))
+		return acpi_evaluate_dsm(handle, guid, rev, func, argv4);
+	return obj;
+}
+EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm);
+
+MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
new file mode 100644
index 000000000..fa763dbfd
--- /dev/null
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -0,0 +1,2957 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/libnvdimm.h>
+#include <linux/genalloc.h>
+#include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+#include <linux/sizes.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <nd-core.h>
+#include <nfit.h>
+#include <nd.h>
+#include "nfit_test.h"
+#include "../watermark.h"
+
+#include <asm/mcsafe_test.h>
+
+/*
+ * Generate an NFIT table to describe the following topology:
+ *
+ * BUS0: Interleaved PMEM regions, and aliasing with BLK regions
+ *
+ *                     (a)                       (b)            DIMM   BLK-REGION
+ *           +----------+--------------+----------+---------+
+ * +------+  |  blk2.0  |     pm0.0    |  blk2.1  |  pm1.0  |    0      region2
+ * | imc0 +--+- - - - - region0 - - - -+----------+         +
+ * +--+---+  |  blk3.0  |     pm0.0    |  blk3.1  |  pm1.0  |    1      region3
+ *    |      +----------+--------------v----------v         v
+ * +--+---+                            |                    |
+ * | cpu0 |                                    region1
+ * +--+---+                            |                    |
+ *    |      +-------------------------^----------^         ^
+ * +--+---+  |                 blk4.0             |  pm1.0  |    2      region4
+ * | imc1 +--+-------------------------+----------+         +
+ * +------+  |                 blk5.0             |  pm1.0  |    3      region5
+ *           +-------------------------+----------+-+-------+
+ *
+ * +--+---+
+ * | cpu1 |
+ * +--+---+                   (Hotplug DIMM)
+ *    |      +----------------------------------------------+
+ * +--+---+  |                 blk6.0/pm7.0                 |    4      region6/7
+ * | imc0 +--+----------------------------------------------+
+ * +------+
+ *
+ *
+ * *) In this layout we have four dimms and two memory controllers in one
+ *    socket.  Each unique interface (BLK or PMEM) to DPA space
+ *    is identified by a region device with a dynamically assigned id.
+ *
+ * *) The first portion of dimm0 and dimm1 are interleaved as REGION0.
+ *    A single PMEM namespace "pm0.0" is created using half of the
+ *    REGION0 SPA-range.  REGION0 spans dimm0 and dimm1.  PMEM namespace
+ *    allocate from from the bottom of a region.  The unallocated
+ *    portion of REGION0 aliases with REGION2 and REGION3.  That
+ *    unallacted capacity is reclaimed as BLK namespaces ("blk2.0" and
+ *    "blk3.0") starting at the base of each DIMM to offset (a) in those
+ *    DIMMs.  "pm0.0", "blk2.0" and "blk3.0" are free-form readable
+ *    names that can be assigned to a namespace.
+ *
+ * *) In the last portion of dimm0 and dimm1 we have an interleaved
+ *    SPA range, REGION1, that spans those two dimms as well as dimm2
+ *    and dimm3.  Some of REGION1 allocated to a PMEM namespace named
+ *    "pm1.0" the rest is reclaimed in 4 BLK namespaces (for each
+ *    dimm in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
+ *    "blk5.0".
+ *
+ * *) The portion of dimm2 and dimm3 that do not participate in the
+ *    REGION1 interleaved SPA range (i.e. the DPA address below offset
+ *    (b) are also included in the "blk4.0" and "blk5.0" namespaces.
+ *    Note, that BLK namespaces need not be contiguous in DPA-space, and
+ *    can consume aliased capacity from multiple interleave sets.
+ *
+ * BUS1: Legacy NVDIMM (single contiguous range)
+ *
+ *  region2
+ * +---------------------+
+ * |---------------------|
+ * ||       pm2.0       ||
+ * |---------------------|
+ * +---------------------+
+ *
+ * *) A NFIT-table may describe a simple system-physical-address range
+ *    with no BLK aliasing.  This type of region may optionally
+ *    reference an NVDIMM.
+ */
+enum {
+	NUM_PM  = 3,
+	NUM_DCR = 5,
+	NUM_HINTS = 8,
+	NUM_BDW = NUM_DCR,
+	NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
+	NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */
+		+ 4 /* spa1 iset */ + 1 /* spa11 iset */,
+	DIMM_SIZE = SZ_32M,
+	LABEL_SIZE = SZ_128K,
+	SPA_VCD_SIZE = SZ_4M,
+	SPA0_SIZE = DIMM_SIZE,
+	SPA1_SIZE = DIMM_SIZE*2,
+	SPA2_SIZE = DIMM_SIZE,
+	BDW_SIZE = 64 << 8,
+	DCR_SIZE = 12,
+	NUM_NFITS = 2, /* permit testing multiple NFITs per system */
+};
+
+struct nfit_test_dcr {
+	__le64 bdw_addr;
+	__le32 bdw_status;
+	__u8 aperature[BDW_SIZE];
+};
+
+#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \
+	(((node & 0xfff) << 16) | ((socket & 0xf) << 12) \
+	 | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf))
+
+static u32 handle[] = {
+	[0] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0),
+	[1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1),
+	[2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0),
+	[3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1),
+	[4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
+	[5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0),
+	[6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1),
+};
+
+static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)];
+static int dimm_fail_cmd_code[ARRAY_SIZE(handle)];
+
+static const struct nd_intel_smart smart_def = {
+	.flags = ND_INTEL_SMART_HEALTH_VALID
+		| ND_INTEL_SMART_SPARES_VALID
+		| ND_INTEL_SMART_ALARM_VALID
+		| ND_INTEL_SMART_USED_VALID
+		| ND_INTEL_SMART_SHUTDOWN_VALID
+		| ND_INTEL_SMART_MTEMP_VALID
+		| ND_INTEL_SMART_CTEMP_VALID,
+	.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
+	.media_temperature = 23 * 16,
+	.ctrl_temperature = 25 * 16,
+	.pmic_temperature = 40 * 16,
+	.spares = 75,
+	.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
+		| ND_INTEL_SMART_TEMP_TRIP,
+	.ait_status = 1,
+	.life_used = 5,
+	.shutdown_state = 0,
+	.vendor_size = 0,
+	.shutdown_count = 100,
+};
+
+struct nfit_test_fw {
+	enum intel_fw_update_state state;
+	u32 context;
+	u64 version;
+	u32 size_received;
+	u64 end_time;
+};
+
+struct nfit_test {
+	struct acpi_nfit_desc acpi_desc;
+	struct platform_device pdev;
+	struct list_head resources;
+	void *nfit_buf;
+	dma_addr_t nfit_dma;
+	size_t nfit_size;
+	size_t nfit_filled;
+	int dcr_idx;
+	int num_dcr;
+	int num_pm;
+	void **dimm;
+	dma_addr_t *dimm_dma;
+	void **flush;
+	dma_addr_t *flush_dma;
+	void **label;
+	dma_addr_t *label_dma;
+	void **spa_set;
+	dma_addr_t *spa_set_dma;
+	struct nfit_test_dcr **dcr;
+	dma_addr_t *dcr_dma;
+	int (*alloc)(struct nfit_test *t);
+	void (*setup)(struct nfit_test *t);
+	int setup_hotplug;
+	union acpi_object **_fit;
+	dma_addr_t _fit_dma;
+	struct ars_state {
+		struct nd_cmd_ars_status *ars_status;
+		unsigned long deadline;
+		spinlock_t lock;
+	} ars_state;
+	struct device *dimm_dev[ARRAY_SIZE(handle)];
+	struct nd_intel_smart *smart;
+	struct nd_intel_smart_threshold *smart_threshold;
+	struct badrange badrange;
+	struct work_struct work;
+	struct nfit_test_fw *fw;
+};
+
+static struct workqueue_struct *nfit_wq;
+
+static struct gen_pool *nfit_pool;
+
+static struct nfit_test *to_nfit_test(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+
+	return container_of(pdev, struct nfit_test, pdev);
+}
+
+static int nd_intel_test_get_fw_info(struct nfit_test *t,
+		struct nd_intel_fw_info *nd_cmd, unsigned int buf_len,
+		int idx)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_fw *fw = &t->fw[idx];
+
+	dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n",
+			__func__, t, nd_cmd, buf_len, idx);
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->status = 0;
+	nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE;
+	nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN;
+	nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL;
+	nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME;
+	nd_cmd->update_cap = 0;
+	nd_cmd->fis_version = INTEL_FW_FIS_VERSION;
+	nd_cmd->run_version = 0;
+	nd_cmd->updated_version = fw->version;
+
+	return 0;
+}
+
+static int nd_intel_test_start_update(struct nfit_test *t,
+		struct nd_intel_fw_start *nd_cmd, unsigned int buf_len,
+		int idx)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_fw *fw = &t->fw[idx];
+
+	dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+			__func__, t, nd_cmd, buf_len, idx);
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	if (fw->state != FW_STATE_NEW) {
+		/* extended status, FW update in progress */
+		nd_cmd->status = 0x10007;
+		return 0;
+	}
+
+	fw->state = FW_STATE_IN_PROGRESS;
+	fw->context++;
+	fw->size_received = 0;
+	nd_cmd->status = 0;
+	nd_cmd->context = fw->context;
+
+	dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context);
+
+	return 0;
+}
+
+static int nd_intel_test_send_data(struct nfit_test *t,
+		struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len,
+		int idx)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_fw *fw = &t->fw[idx];
+	u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length];
+
+	dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+			__func__, t, nd_cmd, buf_len, idx);
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+
+	dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status);
+	dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]);
+	dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1,
+			nd_cmd->data[nd_cmd->length-1]);
+
+	if (fw->state != FW_STATE_IN_PROGRESS) {
+		dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__);
+		*status = 0x5;
+		return 0;
+	}
+
+	if (nd_cmd->context != fw->context) {
+		dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+				__func__, nd_cmd->context, fw->context);
+		*status = 0x10007;
+		return 0;
+	}
+
+	/*
+	 * check offset + len > size of fw storage
+	 * check length is > max send length
+	 */
+	if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE ||
+			nd_cmd->length > INTEL_FW_MAX_SEND_LEN) {
+		*status = 0x3;
+		dev_dbg(dev, "%s: buffer boundary violation\n", __func__);
+		return 0;
+	}
+
+	fw->size_received += nd_cmd->length;
+	dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n",
+			__func__, nd_cmd->length, fw->size_received);
+	*status = 0;
+	return 0;
+}
+
+static int nd_intel_test_finish_fw(struct nfit_test *t,
+		struct nd_intel_fw_finish_update *nd_cmd,
+		unsigned int buf_len, int idx)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_fw *fw = &t->fw[idx];
+
+	dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+			__func__, t, nd_cmd, buf_len, idx);
+
+	if (fw->state == FW_STATE_UPDATED) {
+		/* update already done, need cold boot */
+		nd_cmd->status = 0x20007;
+		return 0;
+	}
+
+	dev_dbg(dev, "%s: context: %#x  ctrl_flags: %#x\n",
+			__func__, nd_cmd->context, nd_cmd->ctrl_flags);
+
+	switch (nd_cmd->ctrl_flags) {
+	case 0: /* finish */
+		if (nd_cmd->context != fw->context) {
+			dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+					__func__, nd_cmd->context,
+					fw->context);
+			nd_cmd->status = 0x10007;
+			return 0;
+		}
+		nd_cmd->status = 0;
+		fw->state = FW_STATE_VERIFY;
+		/* set 1 second of time for firmware "update" */
+		fw->end_time = jiffies + HZ;
+		break;
+
+	case 1: /* abort */
+		fw->size_received = 0;
+		/* successfully aborted status */
+		nd_cmd->status = 0x40007;
+		fw->state = FW_STATE_NEW;
+		dev_dbg(dev, "%s: abort successful\n", __func__);
+		break;
+
+	default: /* bad control flag */
+		dev_warn(dev, "%s: unknown control flag: %#x\n",
+				__func__, nd_cmd->ctrl_flags);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nd_intel_test_finish_query(struct nfit_test *t,
+		struct nd_intel_fw_finish_query *nd_cmd,
+		unsigned int buf_len, int idx)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_fw *fw = &t->fw[idx];
+
+	dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+			__func__, t, nd_cmd, buf_len, idx);
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	if (nd_cmd->context != fw->context) {
+		dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+				__func__, nd_cmd->context, fw->context);
+		nd_cmd->status = 0x10007;
+		return 0;
+	}
+
+	dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context);
+
+	switch (fw->state) {
+	case FW_STATE_NEW:
+		nd_cmd->updated_fw_rev = 0;
+		nd_cmd->status = 0;
+		dev_dbg(dev, "%s: new state\n", __func__);
+		break;
+
+	case FW_STATE_IN_PROGRESS:
+		/* sequencing error */
+		nd_cmd->status = 0x40007;
+		nd_cmd->updated_fw_rev = 0;
+		dev_dbg(dev, "%s: sequence error\n", __func__);
+		break;
+
+	case FW_STATE_VERIFY:
+		if (time_is_after_jiffies64(fw->end_time)) {
+			nd_cmd->updated_fw_rev = 0;
+			nd_cmd->status = 0x20007;
+			dev_dbg(dev, "%s: still verifying\n", __func__);
+			break;
+		}
+
+		dev_dbg(dev, "%s: transition out verify\n", __func__);
+		fw->state = FW_STATE_UPDATED;
+		/* we are going to fall through if it's "done" */
+	case FW_STATE_UPDATED:
+		nd_cmd->status = 0;
+		/* bogus test version */
+		fw->version = nd_cmd->updated_fw_rev =
+			INTEL_FW_FAKE_VERSION;
+		dev_dbg(dev, "%s: updated\n", __func__);
+		break;
+
+	default: /* we should never get here */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
+		unsigned int buf_len)
+{
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	nd_cmd->status = 0;
+	nd_cmd->config_size = LABEL_SIZE;
+	nd_cmd->max_xfer = SZ_4K;
+
+	return 0;
+}
+
+static int nfit_test_cmd_get_config_data(struct nd_cmd_get_config_data_hdr
+		*nd_cmd, unsigned int buf_len, void *label)
+{
+	unsigned int len, offset = nd_cmd->in_offset;
+	int rc;
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+	if (offset >= LABEL_SIZE)
+		return -EINVAL;
+	if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
+		return -EINVAL;
+
+	nd_cmd->status = 0;
+	len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+	memcpy(nd_cmd->out_buf, label + offset, len);
+	rc = buf_len - sizeof(*nd_cmd) - len;
+
+	return rc;
+}
+
+static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
+		unsigned int buf_len, void *label)
+{
+	unsigned int len, offset = nd_cmd->in_offset;
+	u32 *status;
+	int rc;
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+	if (offset >= LABEL_SIZE)
+		return -EINVAL;
+	if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
+		return -EINVAL;
+
+	status = (void *)nd_cmd + nd_cmd->in_length + sizeof(*nd_cmd);
+	*status = 0;
+	len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+	memcpy(label + offset, nd_cmd->in_buf, len);
+	rc = buf_len - sizeof(*nd_cmd) - (len + 4);
+
+	return rc;
+}
+
+#define NFIT_TEST_CLEAR_ERR_UNIT 256
+
+static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
+		unsigned int buf_len)
+{
+	int ars_recs;
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	/* for testing, only store up to n records that fit within 4k */
+	ars_recs = SZ_4K / sizeof(struct nd_ars_record);
+
+	nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+		+ ars_recs * sizeof(struct nd_ars_record);
+	nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
+	nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT;
+
+	return 0;
+}
+
+static void post_ars_status(struct ars_state *ars_state,
+		struct badrange *badrange, u64 addr, u64 len)
+{
+	struct nd_cmd_ars_status *ars_status;
+	struct nd_ars_record *ars_record;
+	struct badrange_entry *be;
+	u64 end = addr + len - 1;
+	int i = 0;
+
+	ars_state->deadline = jiffies + 1*HZ;
+	ars_status = ars_state->ars_status;
+	ars_status->status = 0;
+	ars_status->address = addr;
+	ars_status->length = len;
+	ars_status->type = ND_ARS_PERSISTENT;
+
+	spin_lock(&badrange->lock);
+	list_for_each_entry(be, &badrange->list, list) {
+		u64 be_end = be->start + be->length - 1;
+		u64 rstart, rend;
+
+		/* skip entries outside the range */
+		if (be_end < addr || be->start > end)
+			continue;
+
+		rstart = (be->start < addr) ? addr : be->start;
+		rend = (be_end < end) ? be_end : end;
+		ars_record = &ars_status->records[i];
+		ars_record->handle = 0;
+		ars_record->err_address = rstart;
+		ars_record->length = rend - rstart + 1;
+		i++;
+	}
+	spin_unlock(&badrange->lock);
+	ars_status->num_records = i;
+	ars_status->out_length = sizeof(struct nd_cmd_ars_status)
+		+ i * sizeof(struct nd_ars_record);
+}
+
+static int nfit_test_cmd_ars_start(struct nfit_test *t,
+		struct ars_state *ars_state,
+		struct nd_cmd_ars_start *ars_start, unsigned int buf_len,
+		int *cmd_rc)
+{
+	if (buf_len < sizeof(*ars_start))
+		return -EINVAL;
+
+	spin_lock(&ars_state->lock);
+	if (time_before(jiffies, ars_state->deadline)) {
+		ars_start->status = NFIT_ARS_START_BUSY;
+		*cmd_rc = -EBUSY;
+	} else {
+		ars_start->status = 0;
+		ars_start->scrub_time = 1;
+		post_ars_status(ars_state, &t->badrange, ars_start->address,
+				ars_start->length);
+		*cmd_rc = 0;
+	}
+	spin_unlock(&ars_state->lock);
+
+	return 0;
+}
+
+static int nfit_test_cmd_ars_status(struct ars_state *ars_state,
+		struct nd_cmd_ars_status *ars_status, unsigned int buf_len,
+		int *cmd_rc)
+{
+	if (buf_len < ars_state->ars_status->out_length)
+		return -EINVAL;
+
+	spin_lock(&ars_state->lock);
+	if (time_before(jiffies, ars_state->deadline)) {
+		memset(ars_status, 0, buf_len);
+		ars_status->status = NFIT_ARS_STATUS_BUSY;
+		ars_status->out_length = sizeof(*ars_status);
+		*cmd_rc = -EBUSY;
+	} else {
+		memcpy(ars_status, ars_state->ars_status,
+				ars_state->ars_status->out_length);
+		*cmd_rc = 0;
+	}
+	spin_unlock(&ars_state->lock);
+	return 0;
+}
+
+static int nfit_test_cmd_clear_error(struct nfit_test *t,
+		struct nd_cmd_clear_error *clear_err,
+		unsigned int buf_len, int *cmd_rc)
+{
+	const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1;
+	if (buf_len < sizeof(*clear_err))
+		return -EINVAL;
+
+	if ((clear_err->address & mask) || (clear_err->length & mask))
+		return -EINVAL;
+
+	badrange_forget(&t->badrange, clear_err->address, clear_err->length);
+	clear_err->status = 0;
+	clear_err->cleared = clear_err->length;
+	*cmd_rc = 0;
+	return 0;
+}
+
+struct region_search_spa {
+	u64 addr;
+	struct nd_region *region;
+};
+
+static int is_region_device(struct device *dev)
+{
+	return !strncmp(dev->kobj.name, "region", 6);
+}
+
+static int nfit_test_search_region_spa(struct device *dev, void *data)
+{
+	struct region_search_spa *ctx = data;
+	struct nd_region *nd_region;
+	resource_size_t ndr_end;
+
+	if (!is_region_device(dev))
+		return 0;
+
+	nd_region = to_nd_region(dev);
+	ndr_end = nd_region->ndr_start + nd_region->ndr_size;
+
+	if (ctx->addr >= nd_region->ndr_start && ctx->addr < ndr_end) {
+		ctx->region = nd_region;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int nfit_test_search_spa(struct nvdimm_bus *bus,
+		struct nd_cmd_translate_spa *spa)
+{
+	int ret;
+	struct nd_region *nd_region = NULL;
+	struct nvdimm *nvdimm = NULL;
+	struct nd_mapping *nd_mapping = NULL;
+	struct region_search_spa ctx = {
+		.addr = spa->spa,
+		.region = NULL,
+	};
+	u64 dpa;
+
+	ret = device_for_each_child(&bus->dev, &ctx,
+				nfit_test_search_region_spa);
+
+	if (!ret)
+		return -ENODEV;
+
+	nd_region = ctx.region;
+
+	dpa = ctx.addr - nd_region->ndr_start;
+
+	/*
+	 * last dimm is selected for test
+	 */
+	nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1];
+	nvdimm = nd_mapping->nvdimm;
+
+	spa->devices[0].nfit_device_handle = handle[nvdimm->id];
+	spa->num_nvdimms = 1;
+	spa->devices[0].dpa = dpa;
+
+	return 0;
+}
+
+static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
+		struct nd_cmd_translate_spa *spa, unsigned int buf_len)
+{
+	if (buf_len < spa->translate_length)
+		return -EINVAL;
+
+	if (nfit_test_search_spa(bus, spa) < 0 || !spa->num_nvdimms)
+		spa->status = 2;
+
+	return 0;
+}
+
+static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len,
+		struct nd_intel_smart *smart_data)
+{
+	if (buf_len < sizeof(*smart))
+		return -EINVAL;
+	memcpy(smart, smart_data, sizeof(*smart));
+	return 0;
+}
+
+static int nfit_test_cmd_smart_threshold(
+		struct nd_intel_smart_threshold *out,
+		unsigned int buf_len,
+		struct nd_intel_smart_threshold *smart_t)
+{
+	if (buf_len < sizeof(*smart_t))
+		return -EINVAL;
+	memcpy(out, smart_t, sizeof(*smart_t));
+	return 0;
+}
+
+static void smart_notify(struct device *bus_dev,
+		struct device *dimm_dev, struct nd_intel_smart *smart,
+		struct nd_intel_smart_threshold *thresh)
+{
+	dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n",
+			__func__, thresh->alarm_control, thresh->spares,
+			smart->spares, thresh->media_temperature,
+			smart->media_temperature, thresh->ctrl_temperature,
+			smart->ctrl_temperature);
+	if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP)
+				&& smart->spares
+				<= thresh->spares)
+			|| ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP)
+				&& smart->media_temperature
+				>= thresh->media_temperature)
+			|| ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
+				&& smart->ctrl_temperature
+				>= thresh->ctrl_temperature)
+			|| (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH)
+			|| (smart->shutdown_state != 0)) {
+		device_lock(bus_dev);
+		__acpi_nvdimm_notify(dimm_dev, 0x81);
+		device_unlock(bus_dev);
+	}
+}
+
+static int nfit_test_cmd_smart_set_threshold(
+		struct nd_intel_smart_set_threshold *in,
+		unsigned int buf_len,
+		struct nd_intel_smart_threshold *thresh,
+		struct nd_intel_smart *smart,
+		struct device *bus_dev, struct device *dimm_dev)
+{
+	unsigned int size;
+
+	size = sizeof(*in) - 4;
+	if (buf_len < size)
+		return -EINVAL;
+	memcpy(thresh->data, in, size);
+	in->status = 0;
+	smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+	return 0;
+}
+
+static int nfit_test_cmd_smart_inject(
+		struct nd_intel_smart_inject *inj,
+		unsigned int buf_len,
+		struct nd_intel_smart_threshold *thresh,
+		struct nd_intel_smart *smart,
+		struct device *bus_dev, struct device *dimm_dev)
+{
+	if (buf_len != sizeof(*inj))
+		return -EINVAL;
+
+	if (inj->flags & ND_INTEL_SMART_INJECT_MTEMP) {
+		if (inj->mtemp_enable)
+			smart->media_temperature = inj->media_temperature;
+		else
+			smart->media_temperature = smart_def.media_temperature;
+	}
+	if (inj->flags & ND_INTEL_SMART_INJECT_SPARE) {
+		if (inj->spare_enable)
+			smart->spares = inj->spares;
+		else
+			smart->spares = smart_def.spares;
+	}
+	if (inj->flags & ND_INTEL_SMART_INJECT_FATAL) {
+		if (inj->fatal_enable)
+			smart->health = ND_INTEL_SMART_FATAL_HEALTH;
+		else
+			smart->health = ND_INTEL_SMART_NON_CRITICAL_HEALTH;
+	}
+	if (inj->flags & ND_INTEL_SMART_INJECT_SHUTDOWN) {
+		if (inj->unsafe_shutdown_enable) {
+			smart->shutdown_state = 1;
+			smart->shutdown_count++;
+		} else
+			smart->shutdown_state = 0;
+	}
+	inj->status = 0;
+	smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+	return 0;
+}
+
+static void uc_error_notify(struct work_struct *work)
+{
+	struct nfit_test *t = container_of(work, typeof(*t), work);
+
+	__acpi_nfit_notify(&t->pdev.dev, t, NFIT_NOTIFY_UC_MEMORY_ERROR);
+}
+
+static int nfit_test_cmd_ars_error_inject(struct nfit_test *t,
+		struct nd_cmd_ars_err_inj *err_inj, unsigned int buf_len)
+{
+	int rc;
+
+	if (buf_len != sizeof(*err_inj)) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	if (err_inj->err_inj_spa_range_length <= 0) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	rc =  badrange_add(&t->badrange, err_inj->err_inj_spa_range_base,
+			err_inj->err_inj_spa_range_length);
+	if (rc < 0)
+		goto err;
+
+	if (err_inj->err_inj_options & (1 << ND_ARS_ERR_INJ_OPT_NOTIFY))
+		queue_work(nfit_wq, &t->work);
+
+	err_inj->status = 0;
+	return 0;
+
+err:
+	err_inj->status = NFIT_ARS_INJECT_INVALID;
+	return rc;
+}
+
+static int nfit_test_cmd_ars_inject_clear(struct nfit_test *t,
+		struct nd_cmd_ars_err_inj_clr *err_clr, unsigned int buf_len)
+{
+	int rc;
+
+	if (buf_len != sizeof(*err_clr)) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	if (err_clr->err_inj_clr_spa_range_length <= 0) {
+		rc = -EINVAL;
+		goto err;
+	}
+
+	badrange_forget(&t->badrange, err_clr->err_inj_clr_spa_range_base,
+			err_clr->err_inj_clr_spa_range_length);
+
+	err_clr->status = 0;
+	return 0;
+
+err:
+	err_clr->status = NFIT_ARS_INJECT_INVALID;
+	return rc;
+}
+
+static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
+		struct nd_cmd_ars_err_inj_stat *err_stat,
+		unsigned int buf_len)
+{
+	struct badrange_entry *be;
+	int max = SZ_4K / sizeof(struct nd_error_stat_query_record);
+	int i = 0;
+
+	err_stat->status = 0;
+	spin_lock(&t->badrange.lock);
+	list_for_each_entry(be, &t->badrange.list, list) {
+		err_stat->record[i].err_inj_stat_spa_range_base = be->start;
+		err_stat->record[i].err_inj_stat_spa_range_length = be->length;
+		i++;
+		if (i > max)
+			break;
+	}
+	spin_unlock(&t->badrange.lock);
+	err_stat->inj_err_rec_count = i;
+
+	return 0;
+}
+
+static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t,
+		struct nd_intel_lss *nd_cmd, unsigned int buf_len)
+{
+	struct device *dev = &t->pdev.dev;
+
+	if (buf_len < sizeof(*nd_cmd))
+		return -EINVAL;
+
+	switch (nd_cmd->enable) {
+	case 0:
+		nd_cmd->status = 0;
+		dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n",
+				__func__);
+		break;
+	case 1:
+		nd_cmd->status = 0;
+		dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n",
+				__func__);
+		break;
+	default:
+		dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable);
+		nd_cmd->status = 0x3;
+		break;
+	}
+
+
+	return 0;
+}
+
+static int override_return_code(int dimm, unsigned int func, int rc)
+{
+	if ((1 << func) & dimm_fail_cmd_flags[dimm]) {
+		if (dimm_fail_cmd_code[dimm])
+			return dimm_fail_cmd_code[dimm];
+		return -EIO;
+	}
+	return rc;
+}
+
+static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
+{
+	int i;
+
+	/* lookup per-dimm data */
+	for (i = 0; i < ARRAY_SIZE(handle); i++)
+		if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+			break;
+	if (i >= ARRAY_SIZE(handle))
+		return -ENXIO;
+	return i;
+}
+
+static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
+		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+		unsigned int buf_len, int *cmd_rc)
+{
+	struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+	struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
+	unsigned int func = cmd;
+	int i, rc = 0, __cmd_rc;
+
+	if (!cmd_rc)
+		cmd_rc = &__cmd_rc;
+	*cmd_rc = 0;
+
+	if (nvdimm) {
+		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+		unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
+
+		if (!nfit_mem)
+			return -ENOTTY;
+
+		if (cmd == ND_CMD_CALL) {
+			struct nd_cmd_pkg *call_pkg = buf;
+
+			buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+			buf = (void *) call_pkg->nd_payload;
+			func = call_pkg->nd_command;
+			if (call_pkg->nd_family != nfit_mem->family)
+				return -ENOTTY;
+
+			i = get_dimm(nfit_mem, func);
+			if (i < 0)
+				return i;
+
+			switch (func) {
+			case ND_INTEL_ENABLE_LSS_STATUS:
+				rc = nd_intel_test_cmd_set_lss_status(t,
+						buf, buf_len);
+				break;
+			case ND_INTEL_FW_GET_INFO:
+				rc = nd_intel_test_get_fw_info(t, buf,
+						buf_len, i - t->dcr_idx);
+				break;
+			case ND_INTEL_FW_START_UPDATE:
+				rc = nd_intel_test_start_update(t, buf,
+						buf_len, i - t->dcr_idx);
+				break;
+			case ND_INTEL_FW_SEND_DATA:
+				rc = nd_intel_test_send_data(t, buf,
+						buf_len, i - t->dcr_idx);
+				break;
+			case ND_INTEL_FW_FINISH_UPDATE:
+				rc = nd_intel_test_finish_fw(t, buf,
+						buf_len, i - t->dcr_idx);
+				break;
+			case ND_INTEL_FW_FINISH_QUERY:
+				rc = nd_intel_test_finish_query(t, buf,
+						buf_len, i - t->dcr_idx);
+				break;
+			case ND_INTEL_SMART:
+				rc = nfit_test_cmd_smart(buf, buf_len,
+						&t->smart[i - t->dcr_idx]);
+				break;
+			case ND_INTEL_SMART_THRESHOLD:
+				rc = nfit_test_cmd_smart_threshold(buf,
+						buf_len,
+						&t->smart_threshold[i -
+							t->dcr_idx]);
+				break;
+			case ND_INTEL_SMART_SET_THRESHOLD:
+				rc = nfit_test_cmd_smart_set_threshold(buf,
+						buf_len,
+						&t->smart_threshold[i -
+							t->dcr_idx],
+						&t->smart[i - t->dcr_idx],
+						&t->pdev.dev, t->dimm_dev[i]);
+				break;
+			case ND_INTEL_SMART_INJECT:
+				rc = nfit_test_cmd_smart_inject(buf,
+						buf_len,
+						&t->smart_threshold[i -
+							t->dcr_idx],
+						&t->smart[i - t->dcr_idx],
+						&t->pdev.dev, t->dimm_dev[i]);
+				break;
+			default:
+				return -ENOTTY;
+			}
+			return override_return_code(i, func, rc);
+		}
+
+		if (!test_bit(cmd, &cmd_mask)
+				|| !test_bit(func, &nfit_mem->dsm_mask))
+			return -ENOTTY;
+
+		i = get_dimm(nfit_mem, func);
+		if (i < 0)
+			return i;
+
+		switch (func) {
+		case ND_CMD_GET_CONFIG_SIZE:
+			rc = nfit_test_cmd_get_config_size(buf, buf_len);
+			break;
+		case ND_CMD_GET_CONFIG_DATA:
+			rc = nfit_test_cmd_get_config_data(buf, buf_len,
+				t->label[i - t->dcr_idx]);
+			break;
+		case ND_CMD_SET_CONFIG_DATA:
+			rc = nfit_test_cmd_set_config_data(buf, buf_len,
+				t->label[i - t->dcr_idx]);
+			break;
+		default:
+			return -ENOTTY;
+		}
+		return override_return_code(i, func, rc);
+	} else {
+		struct ars_state *ars_state = &t->ars_state;
+		struct nd_cmd_pkg *call_pkg = buf;
+
+		if (!nd_desc)
+			return -ENOTTY;
+
+		if (cmd == ND_CMD_CALL) {
+			func = call_pkg->nd_command;
+
+			buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+			buf = (void *) call_pkg->nd_payload;
+
+			switch (func) {
+			case NFIT_CMD_TRANSLATE_SPA:
+				rc = nfit_test_cmd_translate_spa(
+					acpi_desc->nvdimm_bus, buf, buf_len);
+				return rc;
+			case NFIT_CMD_ARS_INJECT_SET:
+				rc = nfit_test_cmd_ars_error_inject(t, buf,
+					buf_len);
+				return rc;
+			case NFIT_CMD_ARS_INJECT_CLEAR:
+				rc = nfit_test_cmd_ars_inject_clear(t, buf,
+					buf_len);
+				return rc;
+			case NFIT_CMD_ARS_INJECT_GET:
+				rc = nfit_test_cmd_ars_inject_status(t, buf,
+					buf_len);
+				return rc;
+			default:
+				return -ENOTTY;
+			}
+		}
+
+		if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask))
+			return -ENOTTY;
+
+		switch (func) {
+		case ND_CMD_ARS_CAP:
+			rc = nfit_test_cmd_ars_cap(buf, buf_len);
+			break;
+		case ND_CMD_ARS_START:
+			rc = nfit_test_cmd_ars_start(t, ars_state, buf,
+					buf_len, cmd_rc);
+			break;
+		case ND_CMD_ARS_STATUS:
+			rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len,
+					cmd_rc);
+			break;
+		case ND_CMD_CLEAR_ERROR:
+			rc = nfit_test_cmd_clear_error(t, buf, buf_len, cmd_rc);
+			break;
+		default:
+			return -ENOTTY;
+		}
+	}
+
+	return rc;
+}
+
+static DEFINE_SPINLOCK(nfit_test_lock);
+static struct nfit_test *instances[NUM_NFITS];
+
+static void release_nfit_res(void *data)
+{
+	struct nfit_test_resource *nfit_res = data;
+
+	spin_lock(&nfit_test_lock);
+	list_del(&nfit_res->list);
+	spin_unlock(&nfit_test_lock);
+
+	if (resource_size(&nfit_res->res) >= DIMM_SIZE)
+		gen_pool_free(nfit_pool, nfit_res->res.start,
+				resource_size(&nfit_res->res));
+	vfree(nfit_res->buf);
+	kfree(nfit_res);
+}
+
+static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
+		void *buf)
+{
+	struct device *dev = &t->pdev.dev;
+	struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res),
+			GFP_KERNEL);
+	int rc;
+
+	if (!buf || !nfit_res || !*dma)
+		goto err;
+	rc = devm_add_action(dev, release_nfit_res, nfit_res);
+	if (rc)
+		goto err;
+	INIT_LIST_HEAD(&nfit_res->list);
+	memset(buf, 0, size);
+	nfit_res->dev = dev;
+	nfit_res->buf = buf;
+	nfit_res->res.start = *dma;
+	nfit_res->res.end = *dma + size - 1;
+	nfit_res->res.name = "NFIT";
+	spin_lock_init(&nfit_res->lock);
+	INIT_LIST_HEAD(&nfit_res->requests);
+	spin_lock(&nfit_test_lock);
+	list_add(&nfit_res->list, &t->resources);
+	spin_unlock(&nfit_test_lock);
+
+	return nfit_res->buf;
+ err:
+	if (*dma && size >= DIMM_SIZE)
+		gen_pool_free(nfit_pool, *dma, size);
+	if (buf)
+		vfree(buf);
+	kfree(nfit_res);
+	return NULL;
+}
+
+static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
+{
+	struct genpool_data_align data = {
+		.align = SZ_128M,
+	};
+	void *buf = vmalloc(size);
+
+	if (size >= DIMM_SIZE)
+		*dma = gen_pool_alloc_algo(nfit_pool, size,
+				gen_pool_first_fit_align, &data);
+	else
+		*dma = (unsigned long) buf;
+	return __test_alloc(t, size, dma, buf);
+}
+
+static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(instances); i++) {
+		struct nfit_test_resource *n, *nfit_res = NULL;
+		struct nfit_test *t = instances[i];
+
+		if (!t)
+			continue;
+		spin_lock(&nfit_test_lock);
+		list_for_each_entry(n, &t->resources, list) {
+			if (addr >= n->res.start && (addr < n->res.start
+						+ resource_size(&n->res))) {
+				nfit_res = n;
+				break;
+			} else if (addr >= (unsigned long) n->buf
+					&& (addr < (unsigned long) n->buf
+						+ resource_size(&n->res))) {
+				nfit_res = n;
+				break;
+			}
+		}
+		spin_unlock(&nfit_test_lock);
+		if (nfit_res)
+			return nfit_res;
+	}
+
+	return NULL;
+}
+
+static int ars_state_init(struct device *dev, struct ars_state *ars_state)
+{
+	/* for testing, only store up to n records that fit within 4k */
+	ars_state->ars_status = devm_kzalloc(dev,
+			sizeof(struct nd_cmd_ars_status) + SZ_4K, GFP_KERNEL);
+	if (!ars_state->ars_status)
+		return -ENOMEM;
+	spin_lock_init(&ars_state->lock);
+	return 0;
+}
+
+static void put_dimms(void *data)
+{
+	struct nfit_test *t = data;
+	int i;
+
+	for (i = 0; i < t->num_dcr; i++)
+		if (t->dimm_dev[i])
+			device_unregister(t->dimm_dev[i]);
+}
+
+static struct class *nfit_test_dimm;
+
+static int dimm_name_to_id(struct device *dev)
+{
+	int dimm;
+
+	if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1)
+		return -ENXIO;
+	return dimm;
+}
+
+static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	int dimm = dimm_name_to_id(dev);
+
+	if (dimm < 0)
+		return dimm;
+
+	return sprintf(buf, "%#x\n", handle[dimm]);
+}
+DEVICE_ATTR_RO(handle);
+
+static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	int dimm = dimm_name_to_id(dev);
+
+	if (dimm < 0)
+		return dimm;
+
+	return sprintf(buf, "%#lx\n", dimm_fail_cmd_flags[dimm]);
+}
+
+static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t size)
+{
+	int dimm = dimm_name_to_id(dev);
+	unsigned long val;
+	ssize_t rc;
+
+	if (dimm < 0)
+		return dimm;
+
+	rc = kstrtol(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	dimm_fail_cmd_flags[dimm] = val;
+	return size;
+}
+static DEVICE_ATTR_RW(fail_cmd);
+
+static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	int dimm = dimm_name_to_id(dev);
+
+	if (dimm < 0)
+		return dimm;
+
+	return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]);
+}
+
+static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t size)
+{
+	int dimm = dimm_name_to_id(dev);
+	unsigned long val;
+	ssize_t rc;
+
+	if (dimm < 0)
+		return dimm;
+
+	rc = kstrtol(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	dimm_fail_cmd_code[dimm] = val;
+	return size;
+}
+static DEVICE_ATTR_RW(fail_cmd_code);
+
+static struct attribute *nfit_test_dimm_attributes[] = {
+	&dev_attr_fail_cmd.attr,
+	&dev_attr_fail_cmd_code.attr,
+	&dev_attr_handle.attr,
+	NULL,
+};
+
+static struct attribute_group nfit_test_dimm_attribute_group = {
+	.attrs = nfit_test_dimm_attributes,
+};
+
+static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
+	&nfit_test_dimm_attribute_group,
+	NULL,
+};
+
+static int nfit_test_dimm_init(struct nfit_test *t)
+{
+	int i;
+
+	if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t))
+		return -ENOMEM;
+	for (i = 0; i < t->num_dcr; i++) {
+		t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
+				&t->pdev.dev, 0, NULL,
+				nfit_test_dimm_attribute_groups,
+				"test_dimm%d", i + t->dcr_idx);
+		if (!t->dimm_dev[i])
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static void smart_init(struct nfit_test *t)
+{
+	int i;
+	const struct nd_intel_smart_threshold smart_t_data = {
+		.alarm_control = ND_INTEL_SMART_SPARE_TRIP
+			| ND_INTEL_SMART_TEMP_TRIP,
+		.media_temperature = 40 * 16,
+		.ctrl_temperature = 30 * 16,
+		.spares = 5,
+	};
+
+	for (i = 0; i < t->num_dcr; i++) {
+		memcpy(&t->smart[i], &smart_def, sizeof(smart_def));
+		memcpy(&t->smart_threshold[i], &smart_t_data,
+				sizeof(smart_t_data));
+	}
+}
+
+static int nfit_test0_alloc(struct nfit_test *t)
+{
+	size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
+			+ sizeof(struct acpi_nfit_memory_map) * NUM_MEM
+			+ sizeof(struct acpi_nfit_control_region) * NUM_DCR
+			+ offsetof(struct acpi_nfit_control_region,
+					window_size) * NUM_DCR
+			+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
+			+ (sizeof(struct acpi_nfit_flush_address)
+					+ sizeof(u64) * NUM_HINTS) * NUM_DCR
+			+ sizeof(struct acpi_nfit_capabilities);
+	int i;
+
+	t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
+	if (!t->nfit_buf)
+		return -ENOMEM;
+	t->nfit_size = nfit_size;
+
+	t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]);
+	if (!t->spa_set[0])
+		return -ENOMEM;
+
+	t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]);
+	if (!t->spa_set[1])
+		return -ENOMEM;
+
+	t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]);
+	if (!t->spa_set[2])
+		return -ENOMEM;
+
+	for (i = 0; i < t->num_dcr; i++) {
+		t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]);
+		if (!t->dimm[i])
+			return -ENOMEM;
+
+		t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]);
+		if (!t->label[i])
+			return -ENOMEM;
+		sprintf(t->label[i], "label%d", i);
+
+		t->flush[i] = test_alloc(t, max(PAGE_SIZE,
+					sizeof(u64) * NUM_HINTS),
+				&t->flush_dma[i]);
+		if (!t->flush[i])
+			return -ENOMEM;
+	}
+
+	for (i = 0; i < t->num_dcr; i++) {
+		t->dcr[i] = test_alloc(t, LABEL_SIZE, &t->dcr_dma[i]);
+		if (!t->dcr[i])
+			return -ENOMEM;
+	}
+
+	t->_fit = test_alloc(t, sizeof(union acpi_object **), &t->_fit_dma);
+	if (!t->_fit)
+		return -ENOMEM;
+
+	if (nfit_test_dimm_init(t))
+		return -ENOMEM;
+	smart_init(t);
+	return ars_state_init(&t->pdev.dev, &t->ars_state);
+}
+
+static int nfit_test1_alloc(struct nfit_test *t)
+{
+	size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+		+ sizeof(struct acpi_nfit_memory_map) * 2
+		+ offsetof(struct acpi_nfit_control_region, window_size) * 2;
+	int i;
+
+	t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
+	if (!t->nfit_buf)
+		return -ENOMEM;
+	t->nfit_size = nfit_size;
+
+	t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]);
+	if (!t->spa_set[0])
+		return -ENOMEM;
+
+	for (i = 0; i < t->num_dcr; i++) {
+		t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]);
+		if (!t->label[i])
+			return -ENOMEM;
+		sprintf(t->label[i], "label%d", i);
+	}
+
+	t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]);
+	if (!t->spa_set[1])
+		return -ENOMEM;
+
+	if (nfit_test_dimm_init(t))
+		return -ENOMEM;
+	smart_init(t);
+	return ars_state_init(&t->pdev.dev, &t->ars_state);
+}
+
+static void dcr_common_init(struct acpi_nfit_control_region *dcr)
+{
+	dcr->vendor_id = 0xabcd;
+	dcr->device_id = 0;
+	dcr->revision_id = 1;
+	dcr->valid_fields = 1;
+	dcr->manufacturing_location = 0xa;
+	dcr->manufacturing_date = cpu_to_be16(2016);
+}
+
+static void nfit_test0_setup(struct nfit_test *t)
+{
+	const int flush_hint_size = sizeof(struct acpi_nfit_flush_address)
+		+ (sizeof(u64) * NUM_HINTS);
+	struct acpi_nfit_desc *acpi_desc;
+	struct acpi_nfit_memory_map *memdev;
+	void *nfit_buf = t->nfit_buf;
+	struct acpi_nfit_system_address *spa;
+	struct acpi_nfit_control_region *dcr;
+	struct acpi_nfit_data_region *bdw;
+	struct acpi_nfit_flush_address *flush;
+	struct acpi_nfit_capabilities *pcap;
+	unsigned int offset = 0, i;
+
+	/*
+	 * spa0 (interleave first half of dimm0 and dimm1, note storage
+	 * does not actually alias the related block-data-window
+	 * regions)
+	 */
+	spa = nfit_buf;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+	spa->range_index = 0+1;
+	spa->address = t->spa_set_dma[0];
+	spa->length = SPA0_SIZE;
+	offset += spa->header.length;
+
+	/*
+	 * spa1 (interleave last half of the 4 DIMMS, note storage
+	 * does not actually alias the related block-data-window
+	 * regions)
+	 */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+	spa->range_index = 1+1;
+	spa->address = t->spa_set_dma[1];
+	spa->length = SPA1_SIZE;
+	offset += spa->header.length;
+
+	/* spa2 (dcr0) dimm0 */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+	spa->range_index = 2+1;
+	spa->address = t->dcr_dma[0];
+	spa->length = DCR_SIZE;
+	offset += spa->header.length;
+
+	/* spa3 (dcr1) dimm1 */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+	spa->range_index = 3+1;
+	spa->address = t->dcr_dma[1];
+	spa->length = DCR_SIZE;
+	offset += spa->header.length;
+
+	/* spa4 (dcr2) dimm2 */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+	spa->range_index = 4+1;
+	spa->address = t->dcr_dma[2];
+	spa->length = DCR_SIZE;
+	offset += spa->header.length;
+
+	/* spa5 (dcr3) dimm3 */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+	spa->range_index = 5+1;
+	spa->address = t->dcr_dma[3];
+	spa->length = DCR_SIZE;
+	offset += spa->header.length;
+
+	/* spa6 (bdw for dcr0) dimm0 */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+	spa->range_index = 6+1;
+	spa->address = t->dimm_dma[0];
+	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
+
+	/* spa7 (bdw for dcr1) dimm1 */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+	spa->range_index = 7+1;
+	spa->address = t->dimm_dma[1];
+	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
+
+	/* spa8 (bdw for dcr2) dimm2 */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+	spa->range_index = 8+1;
+	spa->address = t->dimm_dma[2];
+	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
+
+	/* spa9 (bdw for dcr3) dimm3 */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+	spa->range_index = 9+1;
+	spa->address = t->dimm_dma[3];
+	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
+
+	/* mem-region0 (spa0, dimm0) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[0];
+	memdev->physical_id = 0;
+	memdev->region_id = 0;
+	memdev->range_index = 0+1;
+	memdev->region_index = 4+1;
+	memdev->region_size = SPA0_SIZE/2;
+	memdev->region_offset = 1;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 2;
+	offset += memdev->header.length;
+
+	/* mem-region1 (spa0, dimm1) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[1];
+	memdev->physical_id = 1;
+	memdev->region_id = 0;
+	memdev->range_index = 0+1;
+	memdev->region_index = 5+1;
+	memdev->region_size = SPA0_SIZE/2;
+	memdev->region_offset = (1 << 8);
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 2;
+	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
+
+	/* mem-region2 (spa1, dimm0) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[0];
+	memdev->physical_id = 0;
+	memdev->region_id = 1;
+	memdev->range_index = 1+1;
+	memdev->region_index = 4+1;
+	memdev->region_size = SPA1_SIZE/4;
+	memdev->region_offset = (1 << 16);
+	memdev->address = SPA0_SIZE/2;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 4;
+	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
+
+	/* mem-region3 (spa1, dimm1) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[1];
+	memdev->physical_id = 1;
+	memdev->region_id = 1;
+	memdev->range_index = 1+1;
+	memdev->region_index = 5+1;
+	memdev->region_size = SPA1_SIZE/4;
+	memdev->region_offset = (1 << 24);
+	memdev->address = SPA0_SIZE/2;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 4;
+	offset += memdev->header.length;
+
+	/* mem-region4 (spa1, dimm2) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[2];
+	memdev->physical_id = 2;
+	memdev->region_id = 0;
+	memdev->range_index = 1+1;
+	memdev->region_index = 6+1;
+	memdev->region_size = SPA1_SIZE/4;
+	memdev->region_offset = (1ULL << 32);
+	memdev->address = SPA0_SIZE/2;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 4;
+	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
+
+	/* mem-region5 (spa1, dimm3) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[3];
+	memdev->physical_id = 3;
+	memdev->region_id = 0;
+	memdev->range_index = 1+1;
+	memdev->region_index = 7+1;
+	memdev->region_size = SPA1_SIZE/4;
+	memdev->region_offset = (1ULL << 40);
+	memdev->address = SPA0_SIZE/2;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 4;
+	offset += memdev->header.length;
+
+	/* mem-region6 (spa/dcr0, dimm0) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[0];
+	memdev->physical_id = 0;
+	memdev->region_id = 0;
+	memdev->range_index = 2+1;
+	memdev->region_index = 0+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
+
+	/* mem-region7 (spa/dcr1, dimm1) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[1];
+	memdev->physical_id = 1;
+	memdev->region_id = 0;
+	memdev->range_index = 3+1;
+	memdev->region_index = 1+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
+
+	/* mem-region8 (spa/dcr2, dimm2) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[2];
+	memdev->physical_id = 2;
+	memdev->region_id = 0;
+	memdev->range_index = 4+1;
+	memdev->region_index = 2+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
+
+	/* mem-region9 (spa/dcr3, dimm3) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[3];
+	memdev->physical_id = 3;
+	memdev->region_id = 0;
+	memdev->range_index = 5+1;
+	memdev->region_index = 3+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
+
+	/* mem-region10 (spa/bdw0, dimm0) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[0];
+	memdev->physical_id = 0;
+	memdev->region_id = 0;
+	memdev->range_index = 6+1;
+	memdev->region_index = 0+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
+
+	/* mem-region11 (spa/bdw1, dimm1) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[1];
+	memdev->physical_id = 1;
+	memdev->region_id = 0;
+	memdev->range_index = 7+1;
+	memdev->region_index = 1+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
+
+	/* mem-region12 (spa/bdw2, dimm2) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[2];
+	memdev->physical_id = 2;
+	memdev->region_id = 0;
+	memdev->range_index = 8+1;
+	memdev->region_index = 2+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
+
+	/* mem-region13 (spa/dcr3, dimm3) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[3];
+	memdev->physical_id = 3;
+	memdev->region_id = 0;
+	memdev->range_index = 9+1;
+	memdev->region_index = 3+1;
+	memdev->region_size = 0;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
+
+	/* dcr-descriptor0: blk */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = sizeof(*dcr);
+	dcr->region_index = 0+1;
+	dcr_common_init(dcr);
+	dcr->serial_number = ~handle[0];
+	dcr->code = NFIT_FIC_BLK;
+	dcr->windows = 1;
+	dcr->window_size = DCR_SIZE;
+	dcr->command_offset = 0;
+	dcr->command_size = 8;
+	dcr->status_offset = 8;
+	dcr->status_size = 4;
+	offset += dcr->header.length;
+
+	/* dcr-descriptor1: blk */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = sizeof(*dcr);
+	dcr->region_index = 1+1;
+	dcr_common_init(dcr);
+	dcr->serial_number = ~handle[1];
+	dcr->code = NFIT_FIC_BLK;
+	dcr->windows = 1;
+	dcr->window_size = DCR_SIZE;
+	dcr->command_offset = 0;
+	dcr->command_size = 8;
+	dcr->status_offset = 8;
+	dcr->status_size = 4;
+	offset += dcr->header.length;
+
+	/* dcr-descriptor2: blk */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = sizeof(*dcr);
+	dcr->region_index = 2+1;
+	dcr_common_init(dcr);
+	dcr->serial_number = ~handle[2];
+	dcr->code = NFIT_FIC_BLK;
+	dcr->windows = 1;
+	dcr->window_size = DCR_SIZE;
+	dcr->command_offset = 0;
+	dcr->command_size = 8;
+	dcr->status_offset = 8;
+	dcr->status_size = 4;
+	offset += dcr->header.length;
+
+	/* dcr-descriptor3: blk */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = sizeof(*dcr);
+	dcr->region_index = 3+1;
+	dcr_common_init(dcr);
+	dcr->serial_number = ~handle[3];
+	dcr->code = NFIT_FIC_BLK;
+	dcr->windows = 1;
+	dcr->window_size = DCR_SIZE;
+	dcr->command_offset = 0;
+	dcr->command_size = 8;
+	dcr->status_offset = 8;
+	dcr->status_size = 4;
+	offset += dcr->header.length;
+
+	/* dcr-descriptor0: pmem */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = offsetof(struct acpi_nfit_control_region,
+			window_size);
+	dcr->region_index = 4+1;
+	dcr_common_init(dcr);
+	dcr->serial_number = ~handle[0];
+	dcr->code = NFIT_FIC_BYTEN;
+	dcr->windows = 0;
+	offset += dcr->header.length;
+
+	/* dcr-descriptor1: pmem */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = offsetof(struct acpi_nfit_control_region,
+			window_size);
+	dcr->region_index = 5+1;
+	dcr_common_init(dcr);
+	dcr->serial_number = ~handle[1];
+	dcr->code = NFIT_FIC_BYTEN;
+	dcr->windows = 0;
+	offset += dcr->header.length;
+
+	/* dcr-descriptor2: pmem */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = offsetof(struct acpi_nfit_control_region,
+			window_size);
+	dcr->region_index = 6+1;
+	dcr_common_init(dcr);
+	dcr->serial_number = ~handle[2];
+	dcr->code = NFIT_FIC_BYTEN;
+	dcr->windows = 0;
+	offset += dcr->header.length;
+
+	/* dcr-descriptor3: pmem */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = offsetof(struct acpi_nfit_control_region,
+			window_size);
+	dcr->region_index = 7+1;
+	dcr_common_init(dcr);
+	dcr->serial_number = ~handle[3];
+	dcr->code = NFIT_FIC_BYTEN;
+	dcr->windows = 0;
+	offset += dcr->header.length;
+
+	/* bdw0 (spa/dcr0, dimm0) */
+	bdw = nfit_buf + offset;
+	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+	bdw->header.length = sizeof(*bdw);
+	bdw->region_index = 0+1;
+	bdw->windows = 1;
+	bdw->offset = 0;
+	bdw->size = BDW_SIZE;
+	bdw->capacity = DIMM_SIZE;
+	bdw->start_address = 0;
+	offset += bdw->header.length;
+
+	/* bdw1 (spa/dcr1, dimm1) */
+	bdw = nfit_buf + offset;
+	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+	bdw->header.length = sizeof(*bdw);
+	bdw->region_index = 1+1;
+	bdw->windows = 1;
+	bdw->offset = 0;
+	bdw->size = BDW_SIZE;
+	bdw->capacity = DIMM_SIZE;
+	bdw->start_address = 0;
+	offset += bdw->header.length;
+
+	/* bdw2 (spa/dcr2, dimm2) */
+	bdw = nfit_buf + offset;
+	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+	bdw->header.length = sizeof(*bdw);
+	bdw->region_index = 2+1;
+	bdw->windows = 1;
+	bdw->offset = 0;
+	bdw->size = BDW_SIZE;
+	bdw->capacity = DIMM_SIZE;
+	bdw->start_address = 0;
+	offset += bdw->header.length;
+
+	/* bdw3 (spa/dcr3, dimm3) */
+	bdw = nfit_buf + offset;
+	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+	bdw->header.length = sizeof(*bdw);
+	bdw->region_index = 3+1;
+	bdw->windows = 1;
+	bdw->offset = 0;
+	bdw->size = BDW_SIZE;
+	bdw->capacity = DIMM_SIZE;
+	bdw->start_address = 0;
+	offset += bdw->header.length;
+
+	/* flush0 (dimm0) */
+	flush = nfit_buf + offset;
+	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+	flush->header.length = flush_hint_size;
+	flush->device_handle = handle[0];
+	flush->hint_count = NUM_HINTS;
+	for (i = 0; i < NUM_HINTS; i++)
+		flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
+	offset += flush->header.length;
+
+	/* flush1 (dimm1) */
+	flush = nfit_buf + offset;
+	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+	flush->header.length = flush_hint_size;
+	flush->device_handle = handle[1];
+	flush->hint_count = NUM_HINTS;
+	for (i = 0; i < NUM_HINTS; i++)
+		flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
+	offset += flush->header.length;
+
+	/* flush2 (dimm2) */
+	flush = nfit_buf + offset;
+	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+	flush->header.length = flush_hint_size;
+	flush->device_handle = handle[2];
+	flush->hint_count = NUM_HINTS;
+	for (i = 0; i < NUM_HINTS; i++)
+		flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
+	offset += flush->header.length;
+
+	/* flush3 (dimm3) */
+	flush = nfit_buf + offset;
+	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+	flush->header.length = flush_hint_size;
+	flush->device_handle = handle[3];
+	flush->hint_count = NUM_HINTS;
+	for (i = 0; i < NUM_HINTS; i++)
+		flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
+	offset += flush->header.length;
+
+	/* platform capabilities */
+	pcap = nfit_buf + offset;
+	pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
+	pcap->header.length = sizeof(*pcap);
+	pcap->highest_capability = 1;
+	pcap->capabilities = ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+	offset += pcap->header.length;
+
+	if (t->setup_hotplug) {
+		/* dcr-descriptor4: blk */
+		dcr = nfit_buf + offset;
+		dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+		dcr->header.length = sizeof(*dcr);
+		dcr->region_index = 8+1;
+		dcr_common_init(dcr);
+		dcr->serial_number = ~handle[4];
+		dcr->code = NFIT_FIC_BLK;
+		dcr->windows = 1;
+		dcr->window_size = DCR_SIZE;
+		dcr->command_offset = 0;
+		dcr->command_size = 8;
+		dcr->status_offset = 8;
+		dcr->status_size = 4;
+		offset += dcr->header.length;
+
+		/* dcr-descriptor4: pmem */
+		dcr = nfit_buf + offset;
+		dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+		dcr->header.length = offsetof(struct acpi_nfit_control_region,
+				window_size);
+		dcr->region_index = 9+1;
+		dcr_common_init(dcr);
+		dcr->serial_number = ~handle[4];
+		dcr->code = NFIT_FIC_BYTEN;
+		dcr->windows = 0;
+		offset += dcr->header.length;
+
+		/* bdw4 (spa/dcr4, dimm4) */
+		bdw = nfit_buf + offset;
+		bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+		bdw->header.length = sizeof(*bdw);
+		bdw->region_index = 8+1;
+		bdw->windows = 1;
+		bdw->offset = 0;
+		bdw->size = BDW_SIZE;
+		bdw->capacity = DIMM_SIZE;
+		bdw->start_address = 0;
+		offset += bdw->header.length;
+
+		/* spa10 (dcr4) dimm4 */
+		spa = nfit_buf + offset;
+		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+		spa->header.length = sizeof(*spa);
+		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+		spa->range_index = 10+1;
+		spa->address = t->dcr_dma[4];
+		spa->length = DCR_SIZE;
+		offset += spa->header.length;
+
+		/*
+		 * spa11 (single-dimm interleave for hotplug, note storage
+		 * does not actually alias the related block-data-window
+		 * regions)
+		 */
+		spa = nfit_buf + offset;
+		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+		spa->header.length = sizeof(*spa);
+		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+		spa->range_index = 11+1;
+		spa->address = t->spa_set_dma[2];
+		spa->length = SPA0_SIZE;
+		offset += spa->header.length;
+
+		/* spa12 (bdw for dcr4) dimm4 */
+		spa = nfit_buf + offset;
+		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+		spa->header.length = sizeof(*spa);
+		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+		spa->range_index = 12+1;
+		spa->address = t->dimm_dma[4];
+		spa->length = DIMM_SIZE;
+		offset += spa->header.length;
+
+		/* mem-region14 (spa/dcr4, dimm4) */
+		memdev = nfit_buf + offset;
+		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+		memdev->header.length = sizeof(*memdev);
+		memdev->device_handle = handle[4];
+		memdev->physical_id = 4;
+		memdev->region_id = 0;
+		memdev->range_index = 10+1;
+		memdev->region_index = 8+1;
+		memdev->region_size = 0;
+		memdev->region_offset = 0;
+		memdev->address = 0;
+		memdev->interleave_index = 0;
+		memdev->interleave_ways = 1;
+		offset += memdev->header.length;
+
+		/* mem-region15 (spa11, dimm4) */
+		memdev = nfit_buf + offset;
+		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+		memdev->header.length = sizeof(*memdev);
+		memdev->device_handle = handle[4];
+		memdev->physical_id = 4;
+		memdev->region_id = 0;
+		memdev->range_index = 11+1;
+		memdev->region_index = 9+1;
+		memdev->region_size = SPA0_SIZE;
+		memdev->region_offset = (1ULL << 48);
+		memdev->address = 0;
+		memdev->interleave_index = 0;
+		memdev->interleave_ways = 1;
+		memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+		offset += memdev->header.length;
+
+		/* mem-region16 (spa/bdw4, dimm4) */
+		memdev = nfit_buf + offset;
+		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+		memdev->header.length = sizeof(*memdev);
+		memdev->device_handle = handle[4];
+		memdev->physical_id = 4;
+		memdev->region_id = 0;
+		memdev->range_index = 12+1;
+		memdev->region_index = 8+1;
+		memdev->region_size = 0;
+		memdev->region_offset = 0;
+		memdev->address = 0;
+		memdev->interleave_index = 0;
+		memdev->interleave_ways = 1;
+		offset += memdev->header.length;
+
+		/* flush3 (dimm4) */
+		flush = nfit_buf + offset;
+		flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+		flush->header.length = flush_hint_size;
+		flush->device_handle = handle[4];
+		flush->hint_count = NUM_HINTS;
+		for (i = 0; i < NUM_HINTS; i++)
+			flush->hint_address[i] = t->flush_dma[4]
+				+ i * sizeof(u64);
+		offset += flush->header.length;
+
+		/* sanity check to make sure we've filled the buffer */
+		WARN_ON(offset != t->nfit_size);
+	}
+
+	t->nfit_filled = offset;
+
+	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
+			SPA0_SIZE);
+
+	acpi_desc = &t->acpi_desc;
+	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
+	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
+	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
+	set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
+	set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
+	set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
+	set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
+	set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
+	set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
+	set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
+}
+
+static void nfit_test1_setup(struct nfit_test *t)
+{
+	size_t offset;
+	void *nfit_buf = t->nfit_buf;
+	struct acpi_nfit_memory_map *memdev;
+	struct acpi_nfit_control_region *dcr;
+	struct acpi_nfit_system_address *spa;
+	struct acpi_nfit_desc *acpi_desc;
+
+	offset = 0;
+	/* spa0 (flat range with no bdw aliasing) */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+	spa->range_index = 0+1;
+	spa->address = t->spa_set_dma[0];
+	spa->length = SPA2_SIZE;
+	offset += spa->header.length;
+
+	/* virtual cd region */
+	spa = nfit_buf + offset;
+	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+	spa->header.length = sizeof(*spa);
+	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
+	spa->range_index = 0;
+	spa->address = t->spa_set_dma[1];
+	spa->length = SPA_VCD_SIZE;
+	offset += spa->header.length;
+
+	/* mem-region0 (spa0, dimm0) */
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[5];
+	memdev->physical_id = 0;
+	memdev->region_id = 0;
+	memdev->range_index = 0+1;
+	memdev->region_index = 0+1;
+	memdev->region_size = SPA2_SIZE;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
+		| ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
+		| ACPI_NFIT_MEM_NOT_ARMED;
+	offset += memdev->header.length;
+
+	/* dcr-descriptor0 */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = offsetof(struct acpi_nfit_control_region,
+			window_size);
+	dcr->region_index = 0+1;
+	dcr_common_init(dcr);
+	dcr->serial_number = ~handle[5];
+	dcr->code = NFIT_FIC_BYTE;
+	dcr->windows = 0;
+	offset += dcr->header.length;
+
+	memdev = nfit_buf + offset;
+	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+	memdev->header.length = sizeof(*memdev);
+	memdev->device_handle = handle[6];
+	memdev->physical_id = 0;
+	memdev->region_id = 0;
+	memdev->range_index = 0;
+	memdev->region_index = 0+2;
+	memdev->region_size = SPA2_SIZE;
+	memdev->region_offset = 0;
+	memdev->address = 0;
+	memdev->interleave_index = 0;
+	memdev->interleave_ways = 1;
+	memdev->flags = ACPI_NFIT_MEM_MAP_FAILED;
+	offset += memdev->header.length;
+
+	/* dcr-descriptor1 */
+	dcr = nfit_buf + offset;
+	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+	dcr->header.length = offsetof(struct acpi_nfit_control_region,
+			window_size);
+	dcr->region_index = 0+2;
+	dcr_common_init(dcr);
+	dcr->serial_number = ~handle[6];
+	dcr->code = NFIT_FIC_BYTE;
+	dcr->windows = 0;
+	offset += dcr->header.length;
+
+	/* sanity check to make sure we've filled the buffer */
+	WARN_ON(offset != t->nfit_size);
+
+	t->nfit_filled = offset;
+
+	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
+			SPA2_SIZE);
+
+	acpi_desc = &t->acpi_desc;
+	set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
+	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
+	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
+	set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
+	set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+}
+
+static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
+		void *iobuf, u64 len, int rw)
+{
+	struct nfit_blk *nfit_blk = ndbr->blk_provider_data;
+	struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+	struct nd_region *nd_region = &ndbr->nd_region;
+	unsigned int lane;
+
+	lane = nd_region_acquire_lane(nd_region);
+	if (rw)
+		memcpy(mmio->addr.base + dpa, iobuf, len);
+	else {
+		memcpy(iobuf, mmio->addr.base + dpa, len);
+
+		/* give us some some coverage of the arch_invalidate_pmem() API */
+		arch_invalidate_pmem(mmio->addr.base + dpa, len);
+	}
+	nd_region_release_lane(nd_region, lane);
+
+	return 0;
+}
+
+static unsigned long nfit_ctl_handle;
+
+union acpi_object *result;
+
+static union acpi_object *nfit_test_evaluate_dsm(acpi_handle handle,
+		const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4)
+{
+	if (handle != &nfit_ctl_handle)
+		return ERR_PTR(-ENXIO);
+
+	return result;
+}
+
+static int setup_result(void *buf, size_t size)
+{
+	result = kmalloc(sizeof(union acpi_object) + size, GFP_KERNEL);
+	if (!result)
+		return -ENOMEM;
+	result->package.type = ACPI_TYPE_BUFFER,
+	result->buffer.pointer = (void *) (result + 1);
+	result->buffer.length = size;
+	memcpy(result->buffer.pointer, buf, size);
+	memset(buf, 0, size);
+	return 0;
+}
+
+static int nfit_ctl_test(struct device *dev)
+{
+	int rc, cmd_rc;
+	struct nvdimm *nvdimm;
+	struct acpi_device *adev;
+	struct nfit_mem *nfit_mem;
+	struct nd_ars_record *record;
+	struct acpi_nfit_desc *acpi_desc;
+	const u64 test_val = 0x0123456789abcdefULL;
+	unsigned long mask, cmd_size, offset;
+	union {
+		struct nd_cmd_get_config_size cfg_size;
+		struct nd_cmd_clear_error clear_err;
+		struct nd_cmd_ars_status ars_stat;
+		struct nd_cmd_ars_cap ars_cap;
+		char buf[sizeof(struct nd_cmd_ars_status)
+			+ sizeof(struct nd_ars_record)];
+	} cmds;
+
+	adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+	*adev = (struct acpi_device) {
+		.handle = &nfit_ctl_handle,
+		.dev = {
+			.init_name = "test-adev",
+		},
+	};
+
+	acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+	if (!acpi_desc)
+		return -ENOMEM;
+	*acpi_desc = (struct acpi_nfit_desc) {
+		.nd_desc = {
+			.cmd_mask = 1UL << ND_CMD_ARS_CAP
+				| 1UL << ND_CMD_ARS_START
+				| 1UL << ND_CMD_ARS_STATUS
+				| 1UL << ND_CMD_CLEAR_ERROR
+				| 1UL << ND_CMD_CALL,
+			.module = THIS_MODULE,
+			.provider_name = "ACPI.NFIT",
+			.ndctl = acpi_nfit_ctl,
+			.bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
+				| 1UL << NFIT_CMD_ARS_INJECT_SET
+				| 1UL << NFIT_CMD_ARS_INJECT_CLEAR
+				| 1UL << NFIT_CMD_ARS_INJECT_GET,
+		},
+		.dev = &adev->dev,
+	};
+
+	nfit_mem = devm_kzalloc(dev, sizeof(*nfit_mem), GFP_KERNEL);
+	if (!nfit_mem)
+		return -ENOMEM;
+
+	mask = 1UL << ND_CMD_SMART | 1UL << ND_CMD_SMART_THRESHOLD
+		| 1UL << ND_CMD_DIMM_FLAGS | 1UL << ND_CMD_GET_CONFIG_SIZE
+		| 1UL << ND_CMD_GET_CONFIG_DATA | 1UL << ND_CMD_SET_CONFIG_DATA
+		| 1UL << ND_CMD_VENDOR;
+	*nfit_mem = (struct nfit_mem) {
+		.adev = adev,
+		.family = NVDIMM_FAMILY_INTEL,
+		.dsm_mask = mask,
+	};
+
+	nvdimm = devm_kzalloc(dev, sizeof(*nvdimm), GFP_KERNEL);
+	if (!nvdimm)
+		return -ENOMEM;
+	*nvdimm = (struct nvdimm) {
+		.provider_data = nfit_mem,
+		.cmd_mask = mask,
+		.dev = {
+			.init_name = "test-dimm",
+		},
+	};
+
+
+	/* basic checkout of a typical 'get config size' command */
+	cmd_size = sizeof(cmds.cfg_size);
+	cmds.cfg_size = (struct nd_cmd_get_config_size) {
+		.status = 0,
+		.config_size = SZ_128K,
+		.max_xfer = SZ_4K,
+	};
+	rc = setup_result(cmds.buf, cmd_size);
+	if (rc)
+		return rc;
+	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
+			cmds.buf, cmd_size, &cmd_rc);
+
+	if (rc < 0 || cmd_rc || cmds.cfg_size.status != 0
+			|| cmds.cfg_size.config_size != SZ_128K
+			|| cmds.cfg_size.max_xfer != SZ_4K) {
+		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+				__func__, __LINE__, rc, cmd_rc);
+		return -EIO;
+	}
+
+
+	/* test ars_status with zero output */
+	cmd_size = offsetof(struct nd_cmd_ars_status, address);
+	cmds.ars_stat = (struct nd_cmd_ars_status) {
+		.out_length = 0,
+	};
+	rc = setup_result(cmds.buf, cmd_size);
+	if (rc)
+		return rc;
+	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
+			cmds.buf, cmd_size, &cmd_rc);
+
+	if (rc < 0 || cmd_rc) {
+		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+				__func__, __LINE__, rc, cmd_rc);
+		return -EIO;
+	}
+
+
+	/* test ars_cap with benign extended status */
+	cmd_size = sizeof(cmds.ars_cap);
+	cmds.ars_cap = (struct nd_cmd_ars_cap) {
+		.status = ND_ARS_PERSISTENT << 16,
+	};
+	offset = offsetof(struct nd_cmd_ars_cap, status);
+	rc = setup_result(cmds.buf + offset, cmd_size - offset);
+	if (rc)
+		return rc;
+	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_CAP,
+			cmds.buf, cmd_size, &cmd_rc);
+
+	if (rc < 0 || cmd_rc) {
+		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+				__func__, __LINE__, rc, cmd_rc);
+		return -EIO;
+	}
+
+
+	/* test ars_status with 'status' trimmed from 'out_length' */
+	cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
+	cmds.ars_stat = (struct nd_cmd_ars_status) {
+		.out_length = cmd_size - 4,
+	};
+	record = &cmds.ars_stat.records[0];
+	*record = (struct nd_ars_record) {
+		.length = test_val,
+	};
+	rc = setup_result(cmds.buf, cmd_size);
+	if (rc)
+		return rc;
+	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
+			cmds.buf, cmd_size, &cmd_rc);
+
+	if (rc < 0 || cmd_rc || record->length != test_val) {
+		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+				__func__, __LINE__, rc, cmd_rc);
+		return -EIO;
+	}
+
+
+	/* test ars_status with 'Output (Size)' including 'status' */
+	cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
+	cmds.ars_stat = (struct nd_cmd_ars_status) {
+		.out_length = cmd_size,
+	};
+	record = &cmds.ars_stat.records[0];
+	*record = (struct nd_ars_record) {
+		.length = test_val,
+	};
+	rc = setup_result(cmds.buf, cmd_size);
+	if (rc)
+		return rc;
+	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
+			cmds.buf, cmd_size, &cmd_rc);
+
+	if (rc < 0 || cmd_rc || record->length != test_val) {
+		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+				__func__, __LINE__, rc, cmd_rc);
+		return -EIO;
+	}
+
+
+	/* test extended status for get_config_size results in failure */
+	cmd_size = sizeof(cmds.cfg_size);
+	cmds.cfg_size = (struct nd_cmd_get_config_size) {
+		.status = 1 << 16,
+	};
+	rc = setup_result(cmds.buf, cmd_size);
+	if (rc)
+		return rc;
+	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
+			cmds.buf, cmd_size, &cmd_rc);
+
+	if (rc < 0 || cmd_rc >= 0) {
+		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+				__func__, __LINE__, rc, cmd_rc);
+		return -EIO;
+	}
+
+	/* test clear error */
+	cmd_size = sizeof(cmds.clear_err);
+	cmds.clear_err = (struct nd_cmd_clear_error) {
+		.length = 512,
+		.cleared = 512,
+	};
+	rc = setup_result(cmds.buf, cmd_size);
+	if (rc)
+		return rc;
+	rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR,
+			cmds.buf, cmd_size, &cmd_rc);
+	if (rc < 0 || cmd_rc) {
+		dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+				__func__, __LINE__, rc, cmd_rc);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int nfit_test_probe(struct platform_device *pdev)
+{
+	struct nvdimm_bus_descriptor *nd_desc;
+	struct acpi_nfit_desc *acpi_desc;
+	struct device *dev = &pdev->dev;
+	struct nfit_test *nfit_test;
+	struct nfit_mem *nfit_mem;
+	union acpi_object *obj;
+	int rc;
+
+	if (strcmp(dev_name(&pdev->dev), "nfit_test.0") == 0) {
+		rc = nfit_ctl_test(&pdev->dev);
+		if (rc)
+			return rc;
+	}
+
+	nfit_test = to_nfit_test(&pdev->dev);
+
+	/* common alloc */
+	if (nfit_test->num_dcr) {
+		int num = nfit_test->num_dcr;
+
+		nfit_test->dimm = devm_kcalloc(dev, num, sizeof(void *),
+				GFP_KERNEL);
+		nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+				GFP_KERNEL);
+		nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *),
+				GFP_KERNEL);
+		nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+				GFP_KERNEL);
+		nfit_test->label = devm_kcalloc(dev, num, sizeof(void *),
+				GFP_KERNEL);
+		nfit_test->label_dma = devm_kcalloc(dev, num,
+				sizeof(dma_addr_t), GFP_KERNEL);
+		nfit_test->dcr = devm_kcalloc(dev, num,
+				sizeof(struct nfit_test_dcr *), GFP_KERNEL);
+		nfit_test->dcr_dma = devm_kcalloc(dev, num,
+				sizeof(dma_addr_t), GFP_KERNEL);
+		nfit_test->smart = devm_kcalloc(dev, num,
+				sizeof(struct nd_intel_smart), GFP_KERNEL);
+		nfit_test->smart_threshold = devm_kcalloc(dev, num,
+				sizeof(struct nd_intel_smart_threshold),
+				GFP_KERNEL);
+		nfit_test->fw = devm_kcalloc(dev, num,
+				sizeof(struct nfit_test_fw), GFP_KERNEL);
+		if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
+				&& nfit_test->label_dma && nfit_test->dcr
+				&& nfit_test->dcr_dma && nfit_test->flush
+				&& nfit_test->flush_dma
+				&& nfit_test->fw)
+			/* pass */;
+		else
+			return -ENOMEM;
+	}
+
+	if (nfit_test->num_pm) {
+		int num = nfit_test->num_pm;
+
+		nfit_test->spa_set = devm_kcalloc(dev, num, sizeof(void *),
+				GFP_KERNEL);
+		nfit_test->spa_set_dma = devm_kcalloc(dev, num,
+				sizeof(dma_addr_t), GFP_KERNEL);
+		if (nfit_test->spa_set && nfit_test->spa_set_dma)
+			/* pass */;
+		else
+			return -ENOMEM;
+	}
+
+	/* per-nfit specific alloc */
+	if (nfit_test->alloc(nfit_test))
+		return -ENOMEM;
+
+	nfit_test->setup(nfit_test);
+	acpi_desc = &nfit_test->acpi_desc;
+	acpi_nfit_desc_init(acpi_desc, &pdev->dev);
+	acpi_desc->blk_do_io = nfit_test_blk_do_io;
+	nd_desc = &acpi_desc->nd_desc;
+	nd_desc->provider_name = NULL;
+	nd_desc->module = THIS_MODULE;
+	nd_desc->ndctl = nfit_test_ctl;
+
+	rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+			nfit_test->nfit_filled);
+	if (rc)
+		return rc;
+
+	rc = devm_add_action_or_reset(&pdev->dev, acpi_nfit_shutdown, acpi_desc);
+	if (rc)
+		return rc;
+
+	if (nfit_test->setup != nfit_test0_setup)
+		return 0;
+
+	nfit_test->setup_hotplug = 1;
+	nfit_test->setup(nfit_test);
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+	obj->type = ACPI_TYPE_BUFFER;
+	obj->buffer.length = nfit_test->nfit_size;
+	obj->buffer.pointer = nfit_test->nfit_buf;
+	*(nfit_test->_fit) = obj;
+	__acpi_nfit_notify(&pdev->dev, nfit_test, 0x80);
+
+	/* associate dimm devices with nfit_mem data for notification testing */
+	mutex_lock(&acpi_desc->init_mutex);
+	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+		u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(handle); i++)
+			if (nfit_handle == handle[i])
+				dev_set_drvdata(nfit_test->dimm_dev[i],
+						nfit_mem);
+	}
+	mutex_unlock(&acpi_desc->init_mutex);
+
+	return 0;
+}
+
+static int nfit_test_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static void nfit_test_release(struct device *dev)
+{
+	struct nfit_test *nfit_test = to_nfit_test(dev);
+
+	kfree(nfit_test);
+}
+
+static const struct platform_device_id nfit_test_id[] = {
+	{ KBUILD_MODNAME },
+	{ },
+};
+
+static struct platform_driver nfit_test_driver = {
+	.probe = nfit_test_probe,
+	.remove = nfit_test_remove,
+	.driver = {
+		.name = KBUILD_MODNAME,
+	},
+	.id_table = nfit_test_id,
+};
+
+static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+
+enum INJECT {
+	INJECT_NONE,
+	INJECT_SRC,
+	INJECT_DST,
+};
+
+static void mcsafe_test_init(char *dst, char *src, size_t size)
+{
+	size_t i;
+
+	memset(dst, 0xff, size);
+	for (i = 0; i < size; i++)
+		src[i] = (char) i;
+}
+
+static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+		size_t size, unsigned long rem)
+{
+	size_t i;
+
+	for (i = 0; i < size - rem; i++)
+		if (dst[i] != (unsigned char) i) {
+			pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n",
+					__func__, __LINE__, i, dst[i],
+					(unsigned char) i);
+			return false;
+		}
+	for (i = size - rem; i < size; i++)
+		if (dst[i] != 0xffU) {
+			pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n",
+					__func__, __LINE__, i, dst[i]);
+			return false;
+		}
+	return true;
+}
+
+void mcsafe_test(void)
+{
+	char *inject_desc[] = { "none", "source", "destination" };
+	enum INJECT inj;
+
+	if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+		pr_info("%s: run...\n", __func__);
+	} else {
+		pr_info("%s: disabled, skip.\n", __func__);
+		return;
+	}
+
+	for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) {
+		int i;
+
+		pr_info("%s: inject: %s\n", __func__, inject_desc[inj]);
+		for (i = 0; i < 512; i++) {
+			unsigned long expect, rem;
+			void *src, *dst;
+			bool valid;
+
+			switch (inj) {
+			case INJECT_NONE:
+				mcsafe_inject_src(NULL);
+				mcsafe_inject_dst(NULL);
+				dst = &mcsafe_buf[2048];
+				src = &mcsafe_buf[1024 - i];
+				expect = 0;
+				break;
+			case INJECT_SRC:
+				mcsafe_inject_src(&mcsafe_buf[1024]);
+				mcsafe_inject_dst(NULL);
+				dst = &mcsafe_buf[2048];
+				src = &mcsafe_buf[1024 - i];
+				expect = 512 - i;
+				break;
+			case INJECT_DST:
+				mcsafe_inject_src(NULL);
+				mcsafe_inject_dst(&mcsafe_buf[2048]);
+				dst = &mcsafe_buf[2048 - i];
+				src = &mcsafe_buf[1024];
+				expect = 512 - i;
+				break;
+			}
+
+			mcsafe_test_init(dst, src, 512);
+			rem = __memcpy_mcsafe(dst, src, 512);
+			valid = mcsafe_test_validate(dst, src, 512, expect);
+			if (rem == expect && valid)
+				continue;
+			pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
+					__func__,
+					((unsigned long) dst) & ~PAGE_MASK,
+					((unsigned long ) src) & ~PAGE_MASK,
+					512, i, rem, valid ? "valid" : "bad",
+					expect);
+		}
+	}
+
+	mcsafe_inject_src(NULL);
+	mcsafe_inject_dst(NULL);
+}
+
+static __init int nfit_test_init(void)
+{
+	int rc, i;
+
+	pmem_test();
+	libnvdimm_test();
+	acpi_nfit_test();
+	device_dax_test();
+	mcsafe_test();
+
+	nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
+
+	nfit_wq = create_singlethread_workqueue("nfit");
+	if (!nfit_wq)
+		return -ENOMEM;
+
+	nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm");
+	if (IS_ERR(nfit_test_dimm)) {
+		rc = PTR_ERR(nfit_test_dimm);
+		goto err_register;
+	}
+
+	nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE);
+	if (!nfit_pool) {
+		rc = -ENOMEM;
+		goto err_register;
+	}
+
+	if (gen_pool_add(nfit_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) {
+		rc = -ENOMEM;
+		goto err_register;
+	}
+
+	for (i = 0; i < NUM_NFITS; i++) {
+		struct nfit_test *nfit_test;
+		struct platform_device *pdev;
+
+		nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
+		if (!nfit_test) {
+			rc = -ENOMEM;
+			goto err_register;
+		}
+		INIT_LIST_HEAD(&nfit_test->resources);
+		badrange_init(&nfit_test->badrange);
+		switch (i) {
+		case 0:
+			nfit_test->num_pm = NUM_PM;
+			nfit_test->dcr_idx = 0;
+			nfit_test->num_dcr = NUM_DCR;
+			nfit_test->alloc = nfit_test0_alloc;
+			nfit_test->setup = nfit_test0_setup;
+			break;
+		case 1:
+			nfit_test->num_pm = 2;
+			nfit_test->dcr_idx = NUM_DCR;
+			nfit_test->num_dcr = 2;
+			nfit_test->alloc = nfit_test1_alloc;
+			nfit_test->setup = nfit_test1_setup;
+			break;
+		default:
+			rc = -EINVAL;
+			goto err_register;
+		}
+		pdev = &nfit_test->pdev;
+		pdev->name = KBUILD_MODNAME;
+		pdev->id = i;
+		pdev->dev.release = nfit_test_release;
+		rc = platform_device_register(pdev);
+		if (rc) {
+			put_device(&pdev->dev);
+			goto err_register;
+		}
+		get_device(&pdev->dev);
+
+		rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+		if (rc)
+			goto err_register;
+
+		instances[i] = nfit_test;
+		INIT_WORK(&nfit_test->work, uc_error_notify);
+	}
+
+	rc = platform_driver_register(&nfit_test_driver);
+	if (rc)
+		goto err_register;
+	return 0;
+
+ err_register:
+	if (nfit_pool)
+		gen_pool_destroy(nfit_pool);
+
+	destroy_workqueue(nfit_wq);
+	for (i = 0; i < NUM_NFITS; i++)
+		if (instances[i])
+			platform_device_unregister(&instances[i]->pdev);
+	nfit_test_teardown();
+	for (i = 0; i < NUM_NFITS; i++)
+		if (instances[i])
+			put_device(&instances[i]->pdev.dev);
+
+	return rc;
+}
+
+static __exit void nfit_test_exit(void)
+{
+	int i;
+
+	flush_workqueue(nfit_wq);
+	destroy_workqueue(nfit_wq);
+	for (i = 0; i < NUM_NFITS; i++)
+		platform_device_unregister(&instances[i]->pdev);
+	platform_driver_unregister(&nfit_test_driver);
+	nfit_test_teardown();
+
+	gen_pool_destroy(nfit_pool);
+
+	for (i = 0; i < NUM_NFITS; i++)
+		put_device(&instances[i]->pdev.dev);
+	class_destroy(nfit_test_dimm);
+}
+
+module_init(nfit_test_init);
+module_exit(nfit_test_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
new file mode 100644
index 000000000..3de57cc87
--- /dev/null
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __NFIT_TEST_H__
+#define __NFIT_TEST_H__
+#include <linux/acpi.h>
+#include <linux/list.h>
+#include <linux/uuid.h>
+#include <linux/ioport.h>
+#include <linux/spinlock_types.h>
+
+struct nfit_test_request {
+	struct list_head list;
+	struct resource res;
+};
+
+struct nfit_test_resource {
+	struct list_head requests;
+	struct list_head list;
+	struct resource res;
+	struct device *dev;
+	spinlock_t lock;
+	int req_count;
+	void *buf;
+};
+
+#define ND_TRANSLATE_SPA_STATUS_INVALID_SPA  2
+#define NFIT_ARS_INJECT_INVALID 2
+
+enum err_inj_options {
+	ND_ARS_ERR_INJ_OPT_NOTIFY = 0,
+};
+
+/* nfit commands */
+enum nfit_cmd_num {
+	NFIT_CMD_TRANSLATE_SPA = 5,
+	NFIT_CMD_ARS_INJECT_SET = 7,
+	NFIT_CMD_ARS_INJECT_CLEAR = 8,
+	NFIT_CMD_ARS_INJECT_GET = 9,
+};
+
+struct nd_cmd_translate_spa {
+	__u64 spa;
+	__u32 status;
+	__u8  flags;
+	__u8  _reserved[3];
+	__u64 translate_length;
+	__u32 num_nvdimms;
+	struct nd_nvdimm_device {
+		__u32 nfit_device_handle;
+		__u32 _reserved;
+		__u64 dpa;
+	} __packed devices[0];
+
+} __packed;
+
+struct nd_cmd_ars_err_inj {
+	__u64 err_inj_spa_range_base;
+	__u64 err_inj_spa_range_length;
+	__u8  err_inj_options;
+	__u32 status;
+} __packed;
+
+struct nd_cmd_ars_err_inj_clr {
+	__u64 err_inj_clr_spa_range_base;
+	__u64 err_inj_clr_spa_range_length;
+	__u32 status;
+} __packed;
+
+struct nd_cmd_ars_err_inj_stat {
+	__u32 status;
+	__u32 inj_err_rec_count;
+	struct nd_error_stat_query_record {
+		__u64 err_inj_stat_spa_range_base;
+		__u64 err_inj_stat_spa_range_length;
+	} __packed record[0];
+} __packed;
+
+#define ND_INTEL_SMART			 1
+#define ND_INTEL_SMART_THRESHOLD	 2
+#define ND_INTEL_ENABLE_LSS_STATUS	10
+#define ND_INTEL_FW_GET_INFO		12
+#define ND_INTEL_FW_START_UPDATE	13
+#define ND_INTEL_FW_SEND_DATA		14
+#define ND_INTEL_FW_FINISH_UPDATE	15
+#define ND_INTEL_FW_FINISH_QUERY	16
+#define ND_INTEL_SMART_SET_THRESHOLD	17
+#define ND_INTEL_SMART_INJECT		18
+
+#define ND_INTEL_SMART_HEALTH_VALID             (1 << 0)
+#define ND_INTEL_SMART_SPARES_VALID             (1 << 1)
+#define ND_INTEL_SMART_USED_VALID               (1 << 2)
+#define ND_INTEL_SMART_MTEMP_VALID              (1 << 3)
+#define ND_INTEL_SMART_CTEMP_VALID              (1 << 4)
+#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID     (1 << 5)
+#define ND_INTEL_SMART_AIT_STATUS_VALID         (1 << 6)
+#define ND_INTEL_SMART_PTEMP_VALID              (1 << 7)
+#define ND_INTEL_SMART_ALARM_VALID              (1 << 9)
+#define ND_INTEL_SMART_SHUTDOWN_VALID           (1 << 10)
+#define ND_INTEL_SMART_VENDOR_VALID             (1 << 11)
+#define ND_INTEL_SMART_SPARE_TRIP               (1 << 0)
+#define ND_INTEL_SMART_TEMP_TRIP                (1 << 1)
+#define ND_INTEL_SMART_CTEMP_TRIP               (1 << 2)
+#define ND_INTEL_SMART_NON_CRITICAL_HEALTH      (1 << 0)
+#define ND_INTEL_SMART_CRITICAL_HEALTH          (1 << 1)
+#define ND_INTEL_SMART_FATAL_HEALTH             (1 << 2)
+#define ND_INTEL_SMART_INJECT_MTEMP		(1 << 0)
+#define ND_INTEL_SMART_INJECT_SPARE		(1 << 1)
+#define ND_INTEL_SMART_INJECT_FATAL		(1 << 2)
+#define ND_INTEL_SMART_INJECT_SHUTDOWN		(1 << 3)
+
+struct nd_intel_smart {
+	__u32 status;
+	union {
+		struct {
+			__u32 flags;
+			__u8 reserved0[4];
+			__u8 health;
+			__u8 spares;
+			__u8 life_used;
+			__u8 alarm_flags;
+			__u16 media_temperature;
+			__u16 ctrl_temperature;
+			__u32 shutdown_count;
+			__u8 ait_status;
+			__u16 pmic_temperature;
+			__u8 reserved1[8];
+			__u8 shutdown_state;
+			__u32 vendor_size;
+			__u8 vendor_data[92];
+		} __packed;
+		__u8 data[128];
+	};
+} __packed;
+
+struct nd_intel_smart_threshold {
+	__u32 status;
+	union {
+		struct {
+			__u16 alarm_control;
+			__u8 spares;
+			__u16 media_temperature;
+			__u16 ctrl_temperature;
+			__u8 reserved[1];
+		} __packed;
+		__u8 data[8];
+	};
+} __packed;
+
+struct nd_intel_smart_set_threshold {
+	__u16 alarm_control;
+	__u8 spares;
+	__u16 media_temperature;
+	__u16 ctrl_temperature;
+	__u32 status;
+} __packed;
+
+struct nd_intel_smart_inject {
+	__u64 flags;
+	__u8 mtemp_enable;
+	__u16 media_temperature;
+	__u8 spare_enable;
+	__u8 spares;
+	__u8 fatal_enable;
+	__u8 unsafe_shutdown_enable;
+	__u32 status;
+} __packed;
+
+#define INTEL_FW_STORAGE_SIZE		0x100000
+#define INTEL_FW_MAX_SEND_LEN		0xFFEC
+#define INTEL_FW_QUERY_INTERVAL		250000
+#define INTEL_FW_QUERY_MAX_TIME		3000000
+#define INTEL_FW_FIS_VERSION		0x0105
+#define INTEL_FW_FAKE_VERSION		0xffffffffabcd
+
+enum intel_fw_update_state {
+	FW_STATE_NEW = 0,
+	FW_STATE_IN_PROGRESS,
+	FW_STATE_VERIFY,
+	FW_STATE_UPDATED,
+};
+
+struct nd_intel_fw_info {
+	__u32 status;
+	__u32 storage_size;
+	__u32 max_send_len;
+	__u32 query_interval;
+	__u32 max_query_time;
+	__u8 update_cap;
+	__u8 reserved[3];
+	__u32 fis_version;
+	__u64 run_version;
+	__u64 updated_version;
+} __packed;
+
+struct nd_intel_fw_start {
+	__u32 status;
+	__u32 context;
+} __packed;
+
+/* this one has the output first because the variable input data size */
+struct nd_intel_fw_send_data {
+	__u32 context;
+	__u32 offset;
+	__u32 length;
+	__u8 data[0];
+/* this field is not declared due ot variable data from input */
+/*	__u32 status; */
+} __packed;
+
+struct nd_intel_fw_finish_update {
+	__u8 ctrl_flags;
+	__u8 reserved[3];
+	__u32 context;
+	__u32 status;
+} __packed;
+
+struct nd_intel_fw_finish_query {
+	__u32 context;
+	__u32 status;
+	__u64 updated_fw_rev;
+} __packed;
+
+struct nd_intel_lss {
+	__u8 enable;
+	__u32 status;
+} __packed;
+
+typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
+typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
+		 const guid_t *guid, u64 rev, u64 func,
+		 union acpi_object *argv4);
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
+		unsigned long size);
+void __wrap_iounmap(volatile void __iomem *addr);
+void nfit_test_setup(nfit_test_lookup_fn lookup,
+		nfit_test_evaluate_dsm_fn evaluate);
+void nfit_test_teardown(void);
+struct nfit_test_resource *get_nfit_res(resource_size_t resource);
+#endif
diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h
new file mode 100644
index 000000000..ed0528757
--- /dev/null
+++ b/tools/testing/nvdimm/watermark.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+#ifndef _TEST_NVDIMM_WATERMARK_H_
+#define _TEST_NVDIMM_WATERMARK_H_
+int pmem_test(void);
+int libnvdimm_test(void);
+int acpi_nfit_test(void);
+int device_dax_test(void);
+
+/*
+ * dummy routine for nfit_test to validate it is linking to the properly
+ * mocked module and not the standard one from the base tree.
+ */
+#define nfit_test_watermark(x)				\
+int x##_test(void)					\
+{							\
+	pr_debug("%s for nfit_test\n", KBUILD_MODNAME);	\
+	return 0;					\
+}							\
+EXPORT_SYMBOL(x##_test)
+#endif /* _TEST_NVDIMM_WATERMARK_H_ */
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
new file mode 100644
index 000000000..d4706c0ff
--- /dev/null
+++ b/tools/testing/radix-tree/.gitignore
@@ -0,0 +1,6 @@
+generated/map-shift.h
+idr.c
+idr-test
+main
+multiorder
+radix-tree.c
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
new file mode 100644
index 000000000..37baecc37
--- /dev/null
+++ b/tools/testing/radix-tree/Makefile
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
+	  -fsanitize=undefined
+LDFLAGS += -fsanitize=address -fsanitize=undefined
+LDLIBS+= -lpthread -lurcu
+TARGETS = main idr-test multiorder
+CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
+OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
+	 tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
+
+ifndef SHIFT
+	SHIFT=3
+endif
+
+ifeq ($(BUILD), 32)
+	CFLAGS += -m32
+	LDFLAGS += -m32
+endif
+
+targets: generated/map-shift.h $(TARGETS)
+
+main:	$(OFILES)
+
+idr-test.o: ../../../lib/test_ida.c
+idr-test: idr-test.o $(CORE_OFILES)
+
+multiorder: multiorder.o $(CORE_OFILES)
+
+clean:
+	$(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
+
+vpath %.c ../../lib
+
+$(OFILES): Makefile *.h */*.h generated/map-shift.h \
+	../../include/linux/*.h \
+	../../include/asm/*.h \
+	../../../include/linux/radix-tree.h \
+	../../../include/linux/idr.h
+
+radix-tree.c: ../../../lib/radix-tree.c
+	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+idr.c: ../../../lib/idr.c
+	sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+generated/map-shift.h:
+	@if ! grep -qws $(SHIFT) generated/map-shift.h; then		\
+		echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" >		\
+				generated/map-shift.h;			\
+	fi
diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
new file mode 100644
index 000000000..99c40f3ed
--- /dev/null
+++ b/tools/testing/radix-tree/benchmark.c
@@ -0,0 +1,257 @@
+/*
+ * benchmark.c:
+ * Author: Konstantin Khlebnikov <koct9i@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <time.h>
+#include "test.h"
+
+#define for_each_index(i, base, order) \
+	        for (i = base; i < base + (1 << order); i++)
+
+#define NSEC_PER_SEC	1000000000L
+
+static long long benchmark_iter(struct radix_tree_root *root, bool tagged)
+{
+	volatile unsigned long sink = 0;
+	struct radix_tree_iter iter;
+	struct timespec start, finish;
+	long long nsec;
+	int l, loops = 1;
+	void **slot;
+
+#ifdef BENCHMARK
+again:
+#endif
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	for (l = 0; l < loops; l++) {
+		if (tagged) {
+			radix_tree_for_each_tagged(slot, root, &iter, 0, 0)
+				sink ^= (unsigned long)slot;
+		} else {
+			radix_tree_for_each_slot(slot, root, &iter, 0)
+				sink ^= (unsigned long)slot;
+		}
+	}
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+#ifdef BENCHMARK
+	if (loops == 1 && nsec * 5 < NSEC_PER_SEC) {
+		loops = NSEC_PER_SEC / nsec / 4 + 1;
+		goto again;
+	}
+#endif
+
+	nsec /= loops;
+	return nsec;
+}
+
+static void benchmark_insert(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		item_insert_order(root, index, order);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n",
+		size, step, order, nsec);
+}
+
+static void benchmark_tagging(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		radix_tree_tag_set(root, index, 0);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n",
+		size, step, order, nsec);
+}
+
+static void benchmark_delete(struct radix_tree_root *root,
+			     unsigned long size, unsigned long step, int order)
+{
+	struct timespec start, finish;
+	unsigned long index, i;
+	long long nsec;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (index = 0 ; index < size ; index += step)
+		for_each_index(i, index, order)
+			item_delete(root, i);
+
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n",
+		size, step, order, nsec);
+}
+
+static void benchmark_size(unsigned long size, unsigned long step, int order)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	long long normal, tagged;
+
+	benchmark_insert(&tree, size, step, order);
+	benchmark_tagging(&tree, size, step, order);
+
+	tagged = benchmark_iter(&tree, true);
+	normal = benchmark_iter(&tree, false);
+
+	printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n",
+		size, step, order, tagged);
+	printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n",
+		size, step, order, normal);
+
+	benchmark_delete(&tree, size, step, order);
+
+	item_kill_tree(&tree);
+	rcu_barrier();
+}
+
+static long long  __benchmark_split(unsigned long index,
+				    int old_order, int new_order)
+{
+	struct timespec start, finish;
+	long long nsec;
+	RADIX_TREE(tree, GFP_ATOMIC);
+
+	item_insert_order(&tree, index, old_order);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	radix_tree_split(&tree, index, new_order);
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+	       (finish.tv_nsec - start.tv_nsec);
+
+	item_kill_tree(&tree);
+
+	return nsec;
+
+}
+
+static void benchmark_split(unsigned long size, unsigned long step)
+{
+	int i, j, idx;
+	long long nsec = 0;
+
+
+	for (idx = 0; idx < size; idx += step) {
+		for (i = 3; i < 11; i++) {
+			for (j = 0; j < i; j++) {
+				nsec += __benchmark_split(idx, i, j);
+			}
+		}
+	}
+
+	printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
+			size, step, nsec);
+
+}
+
+static long long  __benchmark_join(unsigned long index,
+			     unsigned order1, unsigned order2)
+{
+	unsigned long loc;
+	struct timespec start, finish;
+	long long nsec;
+	void *item, *item2 = item_create(index + 1, order1);
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert_order(&tree, index, order2);
+	item = radix_tree_lookup(&tree, index);
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	radix_tree_join(&tree, index + 1, order1, item2);
+	clock_gettime(CLOCK_MONOTONIC, &finish);
+	nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+		(finish.tv_nsec - start.tv_nsec);
+
+	loc = find_item(&tree, item);
+	if (loc == -1)
+		free(item);
+
+	item_kill_tree(&tree);
+
+	return nsec;
+}
+
+static void benchmark_join(unsigned long step)
+{
+	int i, j, idx;
+	long long nsec = 0;
+
+	for (idx = 0; idx < 1 << 10; idx += step) {
+		for (i = 1; i < 15; i++) {
+			for (j = 0; j < i; j++) {
+				nsec += __benchmark_join(idx, i, j);
+			}
+		}
+	}
+
+	printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
+			1 << 10, step, nsec);
+}
+
+void benchmark(void)
+{
+	unsigned long size[] = {1 << 10, 1 << 20, 0};
+	unsigned long step[] = {1, 2, 7, 15, 63, 64, 65,
+				128, 256, 512, 12345, 0};
+	int c, s;
+
+	printv(1, "starting benchmarks\n");
+	printv(1, "RADIX_TREE_MAP_SHIFT = %d\n", RADIX_TREE_MAP_SHIFT);
+
+	for (c = 0; size[c]; c++)
+		for (s = 0; step[s]; s++)
+			benchmark_size(size[c], step[s], 0);
+
+	for (c = 0; size[c]; c++)
+		for (s = 0; step[s]; s++)
+			benchmark_size(size[c], step[s] << 9, 9);
+
+	for (c = 0; size[c]; c++)
+		for (s = 0; step[s]; s++)
+			benchmark_split(size[c], step[s]);
+
+	for (s = 0; step[s]; s++)
+		benchmark_join(step[s]);
+}
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
new file mode 100644
index 000000000..cf88dc5b8
--- /dev/null
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -0,0 +1 @@
+#define CONFIG_RADIX_TREE_MULTIORDER 1
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
new file mode 100644
index 000000000..235eef71f
--- /dev/null
+++ b/tools/testing/radix-tree/idr-test.c
@@ -0,0 +1,512 @@
+/*
+ * idr-test.c: Test the IDR API
+ * Copyright (c) 2016 Matthew Wilcox <willy@infradead.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/bitmap.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include "test.h"
+
+#define DUMMY_PTR	((void *)0x12)
+
+int item_idr_free(int id, void *p, void *data)
+{
+	struct item *item = p;
+	assert(item->index == id);
+	free(p);
+
+	return 0;
+}
+
+void item_idr_remove(struct idr *idr, int id)
+{
+	struct item *item = idr_find(idr, id);
+	assert(item->index == id);
+	idr_remove(idr, id);
+	free(item);
+}
+
+void idr_alloc_test(void)
+{
+	unsigned long i;
+	DEFINE_IDR(idr);
+
+	assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0, 0x4000, GFP_KERNEL) == 0);
+	assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0x3ffd, 0x4000, GFP_KERNEL) == 0x3ffd);
+	idr_remove(&idr, 0x3ffd);
+	idr_remove(&idr, 0);
+
+	for (i = 0x3ffe; i < 0x4003; i++) {
+		int id;
+		struct item *item;
+
+		if (i < 0x4000)
+			item = item_create(i, 0);
+		else
+			item = item_create(i - 0x3fff, 0);
+
+		id = idr_alloc_cyclic(&idr, item, 1, 0x4000, GFP_KERNEL);
+		assert(id == item->index);
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+}
+
+void idr_replace_test(void)
+{
+	DEFINE_IDR(idr);
+
+	idr_alloc(&idr, (void *)-1, 10, 11, GFP_KERNEL);
+	idr_replace(&idr, &idr, 10);
+
+	idr_destroy(&idr);
+}
+
+/*
+ * Unlike the radix tree, you can put a NULL pointer -- with care -- into
+ * the IDR.  Some interfaces, like idr_find() do not distinguish between
+ * "present, value is NULL" and "not present", but that's exactly what some
+ * users want.
+ */
+void idr_null_test(void)
+{
+	int i;
+	DEFINE_IDR(idr);
+
+	assert(idr_is_empty(&idr));
+
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+	assert(!idr_is_empty(&idr));
+	idr_remove(&idr, 0);
+	assert(idr_is_empty(&idr));
+
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+	assert(!idr_is_empty(&idr));
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+
+	for (i = 0; i < 10; i++) {
+		assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == i);
+	}
+
+	assert(idr_replace(&idr, DUMMY_PTR, 3) == NULL);
+	assert(idr_replace(&idr, DUMMY_PTR, 4) == NULL);
+	assert(idr_replace(&idr, NULL, 4) == DUMMY_PTR);
+	assert(idr_replace(&idr, DUMMY_PTR, 11) == ERR_PTR(-ENOENT));
+	idr_remove(&idr, 5);
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 5);
+	idr_remove(&idr, 5);
+
+	for (i = 0; i < 9; i++) {
+		idr_remove(&idr, i);
+		assert(!idr_is_empty(&idr));
+	}
+	idr_remove(&idr, 8);
+	assert(!idr_is_empty(&idr));
+	idr_remove(&idr, 9);
+	assert(idr_is_empty(&idr));
+
+	assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+	assert(idr_replace(&idr, DUMMY_PTR, 3) == ERR_PTR(-ENOENT));
+	assert(idr_replace(&idr, DUMMY_PTR, 0) == NULL);
+	assert(idr_replace(&idr, NULL, 0) == DUMMY_PTR);
+
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+
+	for (i = 1; i < 10; i++) {
+		assert(idr_alloc(&idr, NULL, 1, 0, GFP_KERNEL) == i);
+	}
+
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+}
+
+void idr_nowait_test(void)
+{
+	unsigned int i;
+	DEFINE_IDR(idr);
+
+	idr_preload(GFP_KERNEL);
+
+	for (i = 0; i < 3; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, i, i + 1, GFP_NOWAIT) == i);
+	}
+
+	idr_preload_end();
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+}
+
+void idr_get_next_test(int base)
+{
+	unsigned long i;
+	int nextid;
+	DEFINE_IDR(idr);
+	idr_init_base(&idr, base);
+
+	int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
+
+	for(i = 0; indices[i]; i++) {
+		struct item *item = item_create(indices[i], 0);
+		assert(idr_alloc(&idr, item, indices[i], indices[i+1],
+				 GFP_KERNEL) == indices[i]);
+	}
+
+	for(i = 0, nextid = 0; indices[i]; i++) {
+		idr_get_next(&idr, &nextid);
+		assert(nextid == indices[i]);
+		nextid++;
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+}
+
+int idr_u32_cb(int id, void *ptr, void *data)
+{
+	BUG_ON(id < 0);
+	BUG_ON(ptr != DUMMY_PTR);
+	return 0;
+}
+
+void idr_u32_test1(struct idr *idr, u32 handle)
+{
+	static bool warned = false;
+	u32 id = handle;
+	int sid = 0;
+	void *ptr;
+
+	BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL));
+	BUG_ON(id != handle);
+	BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL) != -ENOSPC);
+	BUG_ON(id != handle);
+	if (!warned && id > INT_MAX)
+		printk("vvv Ignore these warnings\n");
+	ptr = idr_get_next(idr, &sid);
+	if (id > INT_MAX) {
+		BUG_ON(ptr != NULL);
+		BUG_ON(sid != 0);
+	} else {
+		BUG_ON(ptr != DUMMY_PTR);
+		BUG_ON(sid != id);
+	}
+	idr_for_each(idr, idr_u32_cb, NULL);
+	if (!warned && id > INT_MAX) {
+		printk("^^^ Warnings over\n");
+		warned = true;
+	}
+	BUG_ON(idr_remove(idr, id) != DUMMY_PTR);
+	BUG_ON(!idr_is_empty(idr));
+}
+
+void idr_u32_test(int base)
+{
+	DEFINE_IDR(idr);
+	idr_init_base(&idr, base);
+	idr_u32_test1(&idr, 10);
+	idr_u32_test1(&idr, 0x7fffffff);
+	idr_u32_test1(&idr, 0x80000000);
+	idr_u32_test1(&idr, 0x80000001);
+	idr_u32_test1(&idr, 0xffe00000);
+	idr_u32_test1(&idr, 0xffffffff);
+}
+
+static inline void *idr_mk_value(unsigned long v)
+{
+	BUG_ON((long)v < 0);
+	return (void *)((v & 1) | 2 | (v << 1));
+}
+
+DEFINE_IDR(find_idr);
+
+static void *idr_throbber(void *arg)
+{
+	time_t start = time(NULL);
+	int id = *(int *)arg;
+
+	rcu_register_thread();
+	do {
+		idr_alloc(&find_idr, idr_mk_value(id), id, id + 1, GFP_KERNEL);
+		idr_remove(&find_idr, id);
+	} while (time(NULL) < start + 10);
+	rcu_unregister_thread();
+
+	return NULL;
+}
+
+void idr_find_test_1(int anchor_id, int throbber_id)
+{
+	pthread_t throbber;
+	time_t start = time(NULL);
+
+	pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
+
+	BUG_ON(idr_alloc(&find_idr, idr_mk_value(anchor_id), anchor_id,
+				anchor_id + 1, GFP_KERNEL) != anchor_id);
+
+	do {
+		int id = 0;
+		void *entry = idr_get_next(&find_idr, &id);
+		BUG_ON(entry != idr_mk_value(id));
+	} while (time(NULL) < start + 11);
+
+	pthread_join(throbber, NULL);
+
+	idr_remove(&find_idr, anchor_id);
+	BUG_ON(!idr_is_empty(&find_idr));
+}
+
+void idr_find_test(void)
+{
+	idr_find_test_1(100000, 0);
+	idr_find_test_1(0, 100000);
+}
+
+void idr_checks(void)
+{
+	unsigned long i;
+	DEFINE_IDR(idr);
+
+	for (i = 0; i < 10000; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, 0, 20000, GFP_KERNEL) == i);
+	}
+
+	assert(idr_alloc(&idr, DUMMY_PTR, 5, 30, GFP_KERNEL) < 0);
+
+	for (i = 0; i < 5000; i++)
+		item_idr_remove(&idr, i);
+
+	idr_remove(&idr, 3);
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+
+	assert(idr_is_empty(&idr));
+
+	idr_remove(&idr, 3);
+	idr_remove(&idr, 0);
+
+	assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0);
+	idr_remove(&idr, 1);
+	for (i = 1; i < RADIX_TREE_MAP_SIZE; i++)
+		assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i);
+	idr_remove(&idr, 1 << 30);
+	idr_destroy(&idr);
+
+	for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
+	}
+	assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC);
+	assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i + 10, GFP_KERNEL) == -ENOSPC);
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+	idr_destroy(&idr);
+
+	assert(idr_is_empty(&idr));
+
+	idr_set_cursor(&idr, INT_MAX - 3UL);
+	for (i = INT_MAX - 3UL; i < INT_MAX + 3UL; i++) {
+		struct item *item;
+		unsigned int id;
+		if (i <= INT_MAX)
+			item = item_create(i, 0);
+		else
+			item = item_create(i - INT_MAX - 1, 0);
+
+		id = idr_alloc_cyclic(&idr, item, 0, 0, GFP_KERNEL);
+		assert(id == item->index);
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+	assert(idr_is_empty(&idr));
+
+	for (i = 1; i < 10000; i++) {
+		struct item *item = item_create(i, 0);
+		assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i);
+	}
+
+	idr_for_each(&idr, item_idr_free, &idr);
+	idr_destroy(&idr);
+
+	idr_replace_test();
+	idr_alloc_test();
+	idr_null_test();
+	idr_nowait_test();
+	idr_get_next_test(0);
+	idr_get_next_test(1);
+	idr_get_next_test(4);
+	idr_u32_test(4);
+	idr_u32_test(1);
+	idr_u32_test(0);
+	idr_find_test();
+}
+
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_LICENSE(x)
+#define dump_stack()    assert(0)
+void ida_dump(struct ida *);
+
+#include "../../../lib/test_ida.c"
+
+/*
+ * Check that we get the correct error when we run out of memory doing
+ * allocations.  In userspace, GFP_NOWAIT will always fail an allocation.
+ * The first test is for not having a bitmap available, and the second test
+ * is for not being able to allocate a level of the radix tree.
+ */
+void ida_check_nomem(void)
+{
+	DEFINE_IDA(ida);
+	int id;
+
+	id = ida_alloc_min(&ida, 256, GFP_NOWAIT);
+	IDA_BUG_ON(&ida, id != -ENOMEM);
+	id = ida_alloc_min(&ida, 1UL << 30, GFP_NOWAIT);
+	IDA_BUG_ON(&ida, id != -ENOMEM);
+	IDA_BUG_ON(&ida, !ida_is_empty(&ida));
+}
+
+/*
+ * Check handling of conversions between exceptional entries and full bitmaps.
+ */
+void ida_check_conv_user(void)
+{
+	DEFINE_IDA(ida);
+	unsigned long i;
+
+	radix_tree_cpu_dead(1);
+	for (i = 0; i < 1000000; i++) {
+		int id = ida_alloc(&ida, GFP_NOWAIT);
+		if (id == -ENOMEM) {
+			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) !=
+					BITS_PER_LONG - 2);
+			id = ida_alloc(&ida, GFP_KERNEL);
+		} else {
+			IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
+					BITS_PER_LONG - 2);
+		}
+		IDA_BUG_ON(&ida, id != i);
+	}
+	ida_destroy(&ida);
+}
+
+void ida_check_random(void)
+{
+	DEFINE_IDA(ida);
+	DECLARE_BITMAP(bitmap, 2048);
+	unsigned int i;
+	time_t s = time(NULL);
+
+ repeat:
+	memset(bitmap, 0, sizeof(bitmap));
+	for (i = 0; i < 100000; i++) {
+		int i = rand();
+		int bit = i & 2047;
+		if (test_bit(bit, bitmap)) {
+			__clear_bit(bit, bitmap);
+			ida_free(&ida, bit);
+		} else {
+			__set_bit(bit, bitmap);
+			IDA_BUG_ON(&ida, ida_alloc_min(&ida, bit, GFP_KERNEL)
+					!= bit);
+		}
+	}
+	ida_destroy(&ida);
+	if (time(NULL) < s + 10)
+		goto repeat;
+}
+
+void ida_simple_get_remove_test(void)
+{
+	DEFINE_IDA(ida);
+	unsigned long i;
+
+	for (i = 0; i < 10000; i++) {
+		assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
+	}
+	assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+
+	for (i = 0; i < 10000; i++) {
+		ida_simple_remove(&ida, i);
+	}
+	assert(ida_is_empty(&ida));
+
+	ida_destroy(&ida);
+}
+
+void user_ida_checks(void)
+{
+	radix_tree_cpu_dead(1);
+
+	ida_check_nomem();
+	ida_check_conv_user();
+	ida_check_random();
+	ida_simple_get_remove_test();
+
+	radix_tree_cpu_dead(1);
+}
+
+static void *ida_random_fn(void *arg)
+{
+	rcu_register_thread();
+	ida_check_random();
+	rcu_unregister_thread();
+	return NULL;
+}
+
+void ida_thread_tests(void)
+{
+	pthread_t threads[20];
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++)
+		if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) {
+			perror("creating ida thread");
+			exit(1);
+		}
+
+	while (i--)
+		pthread_join(threads[i], NULL);
+}
+
+void ida_tests(void)
+{
+	user_ida_checks();
+	ida_checks();
+	ida_exit();
+	ida_thread_tests();
+}
+
+int __weak main(void)
+{
+	radix_tree_init();
+	idr_checks();
+	ida_tests();
+	radix_tree_cpu_dead(1);
+	rcu_barrier();
+	if (nr_allocated)
+		printf("nr_allocated = %d\n", nr_allocated);
+	return 0;
+}
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
new file mode 100644
index 000000000..a92bab513
--- /dev/null
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -0,0 +1,221 @@
+/*
+ * iteration_check.c: test races having to do with radix tree iteration
+ * Copyright (c) 2016 Intel Corporation
+ * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <pthread.h>
+#include "test.h"
+
+#define NUM_THREADS	5
+#define MAX_IDX		100
+#define TAG		0
+#define NEW_TAG		1
+
+static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_t threads[NUM_THREADS];
+static unsigned int seeds[3];
+static RADIX_TREE(tree, GFP_KERNEL);
+static bool test_complete;
+static int max_order;
+
+/* relentlessly fill the tree with tagged entries */
+static void *add_entries_fn(void *arg)
+{
+	rcu_register_thread();
+
+	while (!test_complete) {
+		unsigned long pgoff;
+		int order;
+
+		for (pgoff = 0; pgoff < MAX_IDX; pgoff++) {
+			pthread_mutex_lock(&tree_lock);
+			for (order = max_order; order >= 0; order--) {
+				if (item_insert_order(&tree, pgoff, order)
+						== 0) {
+					item_tag_set(&tree, pgoff, TAG);
+					break;
+				}
+			}
+			pthread_mutex_unlock(&tree_lock);
+		}
+	}
+
+	rcu_unregister_thread();
+
+	return NULL;
+}
+
+/*
+ * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find
+ * things that have been removed and randomly resetting our iteration to the
+ * next chunk with radix_tree_iter_resume().  Both radix_tree_iter_retry() and
+ * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
+ * NULL 'slot' variable.
+ */
+static void *tagged_iteration_fn(void *arg)
+{
+	struct radix_tree_iter iter;
+	void **slot;
+
+	rcu_register_thread();
+
+	while (!test_complete) {
+		rcu_read_lock();
+		radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) {
+			void *entry = radix_tree_deref_slot(slot);
+			if (unlikely(!entry))
+				continue;
+
+			if (radix_tree_deref_retry(entry)) {
+				slot = radix_tree_iter_retry(&iter);
+				continue;
+			}
+
+			if (rand_r(&seeds[0]) % 50 == 0) {
+				slot = radix_tree_iter_resume(slot, &iter);
+				rcu_read_unlock();
+				rcu_barrier();
+				rcu_read_lock();
+			}
+		}
+		rcu_read_unlock();
+	}
+
+	rcu_unregister_thread();
+
+	return NULL;
+}
+
+/*
+ * Iterate over the entries, doing a radix_tree_iter_retry() as we find things
+ * that have been removed and randomly resetting our iteration to the next
+ * chunk with radix_tree_iter_resume().  Both radix_tree_iter_retry() and
+ * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
+ * NULL 'slot' variable.
+ */
+static void *untagged_iteration_fn(void *arg)
+{
+	struct radix_tree_iter iter;
+	void **slot;
+
+	rcu_register_thread();
+
+	while (!test_complete) {
+		rcu_read_lock();
+		radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+			void *entry = radix_tree_deref_slot(slot);
+			if (unlikely(!entry))
+				continue;
+
+			if (radix_tree_deref_retry(entry)) {
+				slot = radix_tree_iter_retry(&iter);
+				continue;
+			}
+
+			if (rand_r(&seeds[1]) % 50 == 0) {
+				slot = radix_tree_iter_resume(slot, &iter);
+				rcu_read_unlock();
+				rcu_barrier();
+				rcu_read_lock();
+			}
+		}
+		rcu_read_unlock();
+	}
+
+	rcu_unregister_thread();
+
+	return NULL;
+}
+
+/*
+ * Randomly remove entries to help induce radix_tree_iter_retry() calls in the
+ * two iteration functions.
+ */
+static void *remove_entries_fn(void *arg)
+{
+	rcu_register_thread();
+
+	while (!test_complete) {
+		int pgoff;
+
+		pgoff = rand_r(&seeds[2]) % MAX_IDX;
+
+		pthread_mutex_lock(&tree_lock);
+		item_delete(&tree, pgoff);
+		pthread_mutex_unlock(&tree_lock);
+	}
+
+	rcu_unregister_thread();
+
+	return NULL;
+}
+
+static void *tag_entries_fn(void *arg)
+{
+	rcu_register_thread();
+
+	while (!test_complete) {
+		tag_tagged_items(&tree, &tree_lock, 0, MAX_IDX, 10, TAG,
+					NEW_TAG);
+	}
+	rcu_unregister_thread();
+	return NULL;
+}
+
+/* This is a unit test for a bug found by the syzkaller tester */
+void iteration_test(unsigned order, unsigned test_duration)
+{
+	int i;
+
+	printv(1, "Running %siteration tests for %d seconds\n",
+			order > 0 ? "multiorder " : "", test_duration);
+
+	max_order = order;
+	test_complete = false;
+
+	for (i = 0; i < 3; i++)
+		seeds[i] = rand();
+
+	if (pthread_create(&threads[0], NULL, tagged_iteration_fn, NULL)) {
+		perror("create tagged iteration thread");
+		exit(1);
+	}
+	if (pthread_create(&threads[1], NULL, untagged_iteration_fn, NULL)) {
+		perror("create untagged iteration thread");
+		exit(1);
+	}
+	if (pthread_create(&threads[2], NULL, add_entries_fn, NULL)) {
+		perror("create add entry thread");
+		exit(1);
+	}
+	if (pthread_create(&threads[3], NULL, remove_entries_fn, NULL)) {
+		perror("create remove entry thread");
+		exit(1);
+	}
+	if (pthread_create(&threads[4], NULL, tag_entries_fn, NULL)) {
+		perror("create tag entry thread");
+		exit(1);
+	}
+
+	sleep(test_duration);
+	test_complete = true;
+
+	for (i = 0; i < NUM_THREADS; i++) {
+		if (pthread_join(threads[i], NULL)) {
+			perror("pthread_join");
+			exit(1);
+		}
+	}
+
+	item_kill_tree(&tree);
+}
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
new file mode 100644
index 000000000..44a0d1ad4
--- /dev/null
+++ b/tools/testing/radix-tree/linux.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include <linux/gfp.h>
+#include <linux/poison.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <urcu/uatomic.h>
+
+int nr_allocated;
+int preempt_count;
+int kmalloc_verbose;
+int test_verbose;
+
+struct kmem_cache {
+	pthread_mutex_t lock;
+	int size;
+	int nr_objs;
+	void *objs;
+	void (*ctor)(void *);
+};
+
+void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
+{
+	struct radix_tree_node *node;
+
+	if (!(flags & __GFP_DIRECT_RECLAIM))
+		return NULL;
+
+	pthread_mutex_lock(&cachep->lock);
+	if (cachep->nr_objs) {
+		cachep->nr_objs--;
+		node = cachep->objs;
+		cachep->objs = node->parent;
+		pthread_mutex_unlock(&cachep->lock);
+		node->parent = NULL;
+	} else {
+		pthread_mutex_unlock(&cachep->lock);
+		node = malloc(cachep->size);
+		if (cachep->ctor)
+			cachep->ctor(node);
+	}
+
+	uatomic_inc(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Allocating %p from slab\n", node);
+	return node;
+}
+
+void kmem_cache_free(struct kmem_cache *cachep, void *objp)
+{
+	assert(objp);
+	uatomic_dec(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Freeing %p to slab\n", objp);
+	pthread_mutex_lock(&cachep->lock);
+	if (cachep->nr_objs > 10) {
+		memset(objp, POISON_FREE, cachep->size);
+		free(objp);
+	} else {
+		struct radix_tree_node *node = objp;
+		cachep->nr_objs++;
+		node->parent = cachep->objs;
+		cachep->objs = node;
+	}
+	pthread_mutex_unlock(&cachep->lock);
+}
+
+void *kmalloc(size_t size, gfp_t gfp)
+{
+	void *ret;
+
+	if (!(gfp & __GFP_DIRECT_RECLAIM))
+		return NULL;
+
+	ret = malloc(size);
+	uatomic_inc(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Allocating %p from malloc\n", ret);
+	if (gfp & __GFP_ZERO)
+		memset(ret, 0, size);
+	return ret;
+}
+
+void kfree(void *p)
+{
+	if (!p)
+		return;
+	uatomic_dec(&nr_allocated);
+	if (kmalloc_verbose)
+		printf("Freeing %p to malloc\n", p);
+	free(p);
+}
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t offset,
+	unsigned long flags, void (*ctor)(void *))
+{
+	struct kmem_cache *ret = malloc(sizeof(*ret));
+
+	pthread_mutex_init(&ret->lock, NULL);
+	ret->size = size;
+	ret->nr_objs = 0;
+	ret->objs = NULL;
+	ret->ctor = ctor;
+	return ret;
+}
diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h
new file mode 100644
index 000000000..23b8ed52f
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bug.h
@@ -0,0 +1 @@
+#include "asm/bug.h"
diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/radix-tree/linux/compiler_types.h
diff --git a/tools/testing/radix-tree/linux/cpu.h b/tools/testing/radix-tree/linux/cpu.h
new file mode 100644
index 000000000..a45530d78
--- /dev/null
+++ b/tools/testing/radix-tree/linux/cpu.h
@@ -0,0 +1 @@
+#define cpuhp_setup_state_nocalls(a, b, c, d)	(0)
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
new file mode 100644
index 000000000..32159c08a
--- /dev/null
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _GFP_H
+#define _GFP_H
+
+#include <linux/types.h>
+
+#define __GFP_BITS_SHIFT 26
+#define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+
+#define __GFP_HIGH		0x20u
+#define __GFP_IO		0x40u
+#define __GFP_FS		0x80u
+#define __GFP_NOWARN		0x200u
+#define __GFP_ZERO		0x8000u
+#define __GFP_ATOMIC		0x80000u
+#define __GFP_ACCOUNT		0x100000u
+#define __GFP_DIRECT_RECLAIM	0x400000u
+#define __GFP_KSWAPD_RECLAIM	0x2000000u
+
+#define __GFP_RECLAIM	(__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
+
+#define GFP_ZONEMASK	0x0fu
+#define GFP_ATOMIC	(__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL	(__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_NOWAIT	(__GFP_KSWAPD_RECLAIM)
+
+
+static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
+{
+	return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
+}
+
+#endif
diff --git a/tools/testing/radix-tree/linux/idr.h b/tools/testing/radix-tree/linux/idr.h
new file mode 100644
index 000000000..4e342f2e3
--- /dev/null
+++ b/tools/testing/radix-tree/linux/idr.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/idr.h"
diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h
new file mode 100644
index 000000000..1bb0afc21
--- /dev/null
+++ b/tools/testing/radix-tree/linux/init.h
@@ -0,0 +1 @@
+#define __init
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
new file mode 100644
index 000000000..426f32f28
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KERNEL_H
+#define _KERNEL_H
+
+#include "../../include/linux/kernel.h"
+#include <string.h>
+#include <stdio.h>
+#include <limits.h>
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include "../../../include/linux/kconfig.h"
+
+#define printk printf
+#define pr_debug printk
+#define pr_cont printk
+
+#endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/kmemleak.h b/tools/testing/radix-tree/linux/kmemleak.h
new file mode 100644
index 000000000..155f11278
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kmemleak.h
@@ -0,0 +1 @@
+static inline void kmemleak_update_trace(const void *ptr) { }
diff --git a/tools/testing/radix-tree/linux/percpu.h b/tools/testing/radix-tree/linux/percpu.h
new file mode 100644
index 000000000..b2403aa74
--- /dev/null
+++ b/tools/testing/radix-tree/linux/percpu.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define DECLARE_PER_CPU(type, val) extern type val
+#define DEFINE_PER_CPU(type, val) type val
+
+#define __get_cpu_var(var)	var
+#define this_cpu_ptr(var)	var
+#define this_cpu_read(var)	var
+#define this_cpu_xchg(var, val)		uatomic_xchg(&var, val)
+#define this_cpu_cmpxchg(var, old, new)	uatomic_cmpxchg(&var, old, new)
+#define per_cpu_ptr(ptr, cpu)   ({ (void)(cpu); (ptr); })
+#define per_cpu(var, cpu)	(*per_cpu_ptr(&(var), cpu))
diff --git a/tools/testing/radix-tree/linux/preempt.h b/tools/testing/radix-tree/linux/preempt.h
new file mode 100644
index 000000000..edb10302b
--- /dev/null
+++ b/tools/testing/radix-tree/linux/preempt.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_PREEMPT_H
+#define __LINUX_PREEMPT_H
+
+extern int preempt_count;
+
+#define preempt_disable()	uatomic_inc(&preempt_count)
+#define preempt_enable()	uatomic_dec(&preempt_count)
+
+static inline int in_interrupt(void)
+{
+	return 0;
+}
+
+#endif /* __LINUX_PREEMPT_H */
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
new file mode 100644
index 000000000..24f13d27a
--- /dev/null
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TEST_RADIX_TREE_H
+#define _TEST_RADIX_TREE_H
+
+#include "generated/map-shift.h"
+#include "../../../../include/linux/radix-tree.h"
+
+extern int kmalloc_verbose;
+extern int test_verbose;
+
+static inline void trace_call_rcu(struct rcu_head *head,
+		void (*func)(struct rcu_head *head))
+{
+	if (kmalloc_verbose)
+		printf("Delaying free of %p to slab\n", (char *)head -
+				offsetof(struct radix_tree_node, rcu_head));
+	call_rcu(head, func);
+}
+
+#define printv(verbosity_level, fmt, ...) \
+	if(test_verbose >= verbosity_level) \
+		printf(fmt, ##__VA_ARGS__)
+
+#undef call_rcu
+#define call_rcu(x, y) trace_call_rcu(x, y)
+
+#endif /* _TEST_RADIX_TREE_H */
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
new file mode 100644
index 000000000..73ed33658
--- /dev/null
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _RCUPDATE_H
+#define _RCUPDATE_H
+
+#include <urcu.h>
+
+#define rcu_dereference_raw(p) rcu_dereference(p)
+#define rcu_dereference_protected(p, cond) rcu_dereference(p)
+
+#endif
diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h
new file mode 100644
index 000000000..a037def0d
--- /dev/null
+++ b/tools/testing/radix-tree/linux/slab.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef SLAB_H
+#define SLAB_H
+
+#include <linux/types.h>
+#include <linux/gfp.h>
+
+#define SLAB_HWCACHE_ALIGN 1
+#define SLAB_PANIC 2
+#define SLAB_RECLAIM_ACCOUNT    0x00020000UL            /* Objects are reclaimable */
+
+void *kmalloc(size_t size, gfp_t);
+void kfree(void *);
+
+static inline void *kzalloc(size_t size, gfp_t gfp)
+{
+        return kmalloc(size, gfp | __GFP_ZERO);
+}
+
+void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
+void kmem_cache_free(struct kmem_cache *cachep, void *objp);
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t offset,
+	unsigned long flags, void (*ctor)(void *));
+
+#endif		/* SLAB_H */
diff --git a/tools/testing/radix-tree/linux/xarray.h b/tools/testing/radix-tree/linux/xarray.h
new file mode 100644
index 000000000..df3812cda
--- /dev/null
+++ b/tools/testing/radix-tree/linux/xarray.h
@@ -0,0 +1,2 @@
+#include "generated/map-shift.h"
+#include "../../../../include/linux/xarray.h"
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
new file mode 100644
index 000000000..b741686e5
--- /dev/null
+++ b/tools/testing/radix-tree/main.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <assert.h>
+#include <limits.h>
+
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+
+#include "test.h"
+#include "regression.h"
+
+void __gang_check(unsigned long middle, long down, long up, int chunk, int hop)
+{
+	long idx;
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	middle = 1 << 30;
+
+	for (idx = -down; idx < up; idx++)
+		item_insert(&tree, middle + idx);
+
+	item_check_absent(&tree, middle - down - 1);
+	for (idx = -down; idx < up; idx++)
+		item_check_present(&tree, middle + idx);
+	item_check_absent(&tree, middle + up);
+
+	if (chunk > 0) {
+		item_gang_check_present(&tree, middle - down, up + down,
+				chunk, hop);
+		item_full_scan(&tree, middle - down, down + up, chunk);
+	}
+	item_kill_tree(&tree);
+}
+
+void gang_check(void)
+{
+	__gang_check(1UL << 30, 128, 128, 35, 2);
+	__gang_check(1UL << 31, 128, 128, 32, 32);
+	__gang_check(1UL << 31, 128, 128, 32, 100);
+	__gang_check(1UL << 31, 128, 128, 17, 7);
+	__gang_check(0xffff0000UL, 0, 65536, 17, 7);
+	__gang_check(0xfffffffeUL, 1, 1, 17, 7);
+}
+
+void __big_gang_check(void)
+{
+	unsigned long start;
+	int wrapped = 0;
+
+	start = 0;
+	do {
+		unsigned long old_start;
+
+//		printf("0x%08lx\n", start);
+		__gang_check(start, rand() % 113 + 1, rand() % 71,
+				rand() % 157, rand() % 91 + 1);
+		old_start = start;
+		start += rand() % 1000000;
+		start %= 1ULL << 33;
+		if (start < old_start)
+			wrapped = 1;
+	} while (!wrapped);
+}
+
+void big_gang_check(bool long_run)
+{
+	int i;
+
+	for (i = 0; i < (long_run ? 1000 : 3); i++) {
+		__big_gang_check();
+		printv(2, "%d ", i);
+		fflush(stdout);
+	}
+}
+
+void add_and_check(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert(&tree, 44);
+	item_check_present(&tree, 44);
+	item_check_absent(&tree, 43);
+	item_kill_tree(&tree);
+}
+
+void dynamic_height_check(void)
+{
+	int i;
+	RADIX_TREE(tree, GFP_KERNEL);
+	tree_verify_min_height(&tree, 0);
+
+	item_insert(&tree, 42);
+	tree_verify_min_height(&tree, 42);
+
+	item_insert(&tree, 1000000);
+	tree_verify_min_height(&tree, 1000000);
+
+	assert(item_delete(&tree, 1000000));
+	tree_verify_min_height(&tree, 42);
+
+	assert(item_delete(&tree, 42));
+	tree_verify_min_height(&tree, 0);
+
+	for (i = 0; i < 1000; i++) {
+		item_insert(&tree, i);
+		tree_verify_min_height(&tree, i);
+	}
+
+	i--;
+	for (;;) {
+		assert(item_delete(&tree, i));
+		if (i == 0) {
+			tree_verify_min_height(&tree, 0);
+			break;
+		}
+		i--;
+		tree_verify_min_height(&tree, i);
+	}
+
+	item_kill_tree(&tree);
+}
+
+void check_copied_tags(struct radix_tree_root *tree, unsigned long start, unsigned long end, unsigned long *idx, int count, int fromtag, int totag)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+/*		if (i % 1000 == 0)
+			putchar('.'); */
+		if (idx[i] < start || idx[i] > end) {
+			if (item_tag_get(tree, idx[i], totag)) {
+				printv(2, "%lu-%lu: %lu, tags %d-%d\n", start,
+				       end, idx[i], item_tag_get(tree, idx[i],
+								 fromtag),
+				       item_tag_get(tree, idx[i], totag));
+			}
+			assert(!item_tag_get(tree, idx[i], totag));
+			continue;
+		}
+		if (item_tag_get(tree, idx[i], fromtag) ^
+			item_tag_get(tree, idx[i], totag)) {
+			printv(2, "%lu-%lu: %lu, tags %d-%d\n", start, end,
+			       idx[i], item_tag_get(tree, idx[i], fromtag),
+			       item_tag_get(tree, idx[i], totag));
+		}
+		assert(!(item_tag_get(tree, idx[i], fromtag) ^
+			 item_tag_get(tree, idx[i], totag)));
+	}
+}
+
+#define ITEMS 50000
+
+void copy_tag_check(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	unsigned long idx[ITEMS];
+	unsigned long start, end, count = 0, tagged, cur, tmp;
+	int i;
+
+//	printf("generating radix tree indices...\n");
+	start = rand();
+	end = rand();
+	if (start > end && (rand() % 10)) {
+		cur = start;
+		start = end;
+		end = cur;
+	}
+	/* Specifically create items around the start and the end of the range
+	 * with high probability to check for off by one errors */
+	cur = rand();
+	if (cur & 1) {
+		item_insert(&tree, start);
+		if (cur & 2) {
+			if (start <= end)
+				count++;
+			item_tag_set(&tree, start, 0);
+		}
+	}
+	if (cur & 4) {
+		item_insert(&tree, start-1);
+		if (cur & 8)
+			item_tag_set(&tree, start-1, 0);
+	}
+	if (cur & 16) {
+		item_insert(&tree, end);
+		if (cur & 32) {
+			if (start <= end)
+				count++;
+			item_tag_set(&tree, end, 0);
+		}
+	}
+	if (cur & 64) {
+		item_insert(&tree, end+1);
+		if (cur & 128)
+			item_tag_set(&tree, end+1, 0);
+	}
+
+	for (i = 0; i < ITEMS; i++) {
+		do {
+			idx[i] = rand();
+		} while (item_lookup(&tree, idx[i]));
+
+		item_insert(&tree, idx[i]);
+		if (rand() & 1) {
+			item_tag_set(&tree, idx[i], 0);
+			if (idx[i] >= start && idx[i] <= end)
+				count++;
+		}
+/*		if (i % 1000 == 0)
+			putchar('.'); */
+	}
+
+//	printf("\ncopying tags...\n");
+	tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1);
+
+//	printf("checking copied tags\n");
+	assert(tagged == count);
+	check_copied_tags(&tree, start, end, idx, ITEMS, 0, 1);
+
+	/* Copy tags in several rounds */
+//	printf("\ncopying tags...\n");
+	tmp = rand() % (count / 10 + 2);
+	tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2);
+	assert(tagged == count);
+
+//	printf("%lu %lu %lu\n", tagged, tmp, count);
+//	printf("checking copied tags\n");
+	check_copied_tags(&tree, start, end, idx, ITEMS, 0, 2);
+	verify_tag_consistency(&tree, 0);
+	verify_tag_consistency(&tree, 1);
+	verify_tag_consistency(&tree, 2);
+//	printf("\n");
+	item_kill_tree(&tree);
+}
+
+static void __locate_check(struct radix_tree_root *tree, unsigned long index,
+			unsigned order)
+{
+	struct item *item;
+	unsigned long index2;
+
+	item_insert_order(tree, index, order);
+	item = item_lookup(tree, index);
+	index2 = find_item(tree, item);
+	if (index != index2) {
+		printv(2, "index %ld order %d inserted; found %ld\n",
+			index, order, index2);
+		abort();
+	}
+}
+
+static void __order_0_locate_check(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	int i;
+
+	for (i = 0; i < 50; i++)
+		__locate_check(&tree, rand() % INT_MAX, 0);
+
+	item_kill_tree(&tree);
+}
+
+static void locate_check(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	unsigned order;
+	unsigned long offset, index;
+
+	__order_0_locate_check();
+
+	for (order = 0; order < 20; order++) {
+		for (offset = 0; offset < (1 << (order + 3));
+		     offset += (1UL << order)) {
+			for (index = 0; index < (1UL << (order + 5));
+			     index += (1UL << order)) {
+				__locate_check(&tree, index + offset, order);
+			}
+			if (find_item(&tree, &tree) != -1)
+				abort();
+
+			item_kill_tree(&tree);
+		}
+	}
+
+	if (find_item(&tree, &tree) != -1)
+		abort();
+	__locate_check(&tree, -1, 0);
+	if (find_item(&tree, &tree) != -1)
+		abort();
+	item_kill_tree(&tree);
+}
+
+static void single_thread_tests(bool long_run)
+{
+	int i;
+
+	printv(1, "starting single_thread_tests: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+	multiorder_checks();
+	rcu_barrier();
+	printv(2, "after multiorder_check: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+	locate_check();
+	rcu_barrier();
+	printv(2, "after locate_check: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+	tag_check();
+	rcu_barrier();
+	printv(2, "after tag_check: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+	gang_check();
+	rcu_barrier();
+	printv(2, "after gang_check: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+	add_and_check();
+	rcu_barrier();
+	printv(2, "after add_and_check: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+	dynamic_height_check();
+	rcu_barrier();
+	printv(2, "after dynamic_height_check: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+	idr_checks();
+	ida_tests();
+	rcu_barrier();
+	printv(2, "after idr_checks: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+	big_gang_check(long_run);
+	rcu_barrier();
+	printv(2, "after big_gang_check: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+	for (i = 0; i < (long_run ? 2000 : 3); i++) {
+		copy_tag_check();
+		printv(2, "%d ", i);
+		fflush(stdout);
+	}
+	rcu_barrier();
+	printv(2, "after copy_tag_check: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+}
+
+int main(int argc, char **argv)
+{
+	bool long_run = false;
+	int opt;
+	unsigned int seed = time(NULL);
+
+	while ((opt = getopt(argc, argv, "ls:v")) != -1) {
+		if (opt == 'l')
+			long_run = true;
+		else if (opt == 's')
+			seed = strtoul(optarg, NULL, 0);
+		else if (opt == 'v')
+			test_verbose++;
+	}
+
+	printf("random seed %u\n", seed);
+	srand(seed);
+
+	printf("running tests\n");
+
+	rcu_register_thread();
+	radix_tree_init();
+
+	regression1_test();
+	regression2_test();
+	regression3_test();
+	iteration_test(0, 10 + 90 * long_run);
+	iteration_test(7, 10 + 90 * long_run);
+	single_thread_tests(long_run);
+
+	/* Free any remaining preallocated nodes */
+	radix_tree_cpu_dead(0);
+
+	benchmark();
+
+	rcu_barrier();
+	printv(2, "after rcu_barrier: %d allocated, preempt %d\n",
+		nr_allocated, preempt_count);
+	rcu_unregister_thread();
+
+	printf("tests completed\n");
+
+	exit(0);
+}
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
new file mode 100644
index 000000000..7bf405638
--- /dev/null
+++ b/tools/testing/radix-tree/multiorder.c
@@ -0,0 +1,719 @@
+/*
+ * multiorder.c: Multi-order radix tree entry testing
+ * Copyright (c) 2016 Intel Corporation
+ * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
+ * Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <pthread.h>
+
+#include "test.h"
+
+#define for_each_index(i, base, order) \
+	for (i = base; i < base + (1 << order); i++)
+
+static void __multiorder_tag_test(int index, int order)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	int base, err, i;
+
+	/* our canonical entry */
+	base = index & ~((1 << order) - 1);
+
+	printv(2, "Multiorder tag test with index %d, canonical entry %d\n",
+			index, base);
+
+	err = item_insert_order(&tree, index, order);
+	assert(!err);
+
+	/*
+	 * Verify we get collisions for covered indices.  We try and fail to
+	 * insert an exceptional entry so we don't leak memory via
+	 * item_insert_order().
+	 */
+	for_each_index(i, base, order) {
+		err = __radix_tree_insert(&tree, i, order,
+				(void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY));
+		assert(err == -EEXIST);
+	}
+
+	for_each_index(i, base, order) {
+		assert(!radix_tree_tag_get(&tree, i, 0));
+		assert(!radix_tree_tag_get(&tree, i, 1));
+	}
+
+	assert(radix_tree_tag_set(&tree, index, 0));
+
+	for_each_index(i, base, order) {
+		assert(radix_tree_tag_get(&tree, i, 0));
+		assert(!radix_tree_tag_get(&tree, i, 1));
+	}
+
+	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1);
+	assert(radix_tree_tag_clear(&tree, index, 0));
+
+	for_each_index(i, base, order) {
+		assert(!radix_tree_tag_get(&tree, i, 0));
+		assert(radix_tree_tag_get(&tree, i, 1));
+	}
+
+	assert(radix_tree_tag_clear(&tree, index, 1));
+
+	assert(!radix_tree_tagged(&tree, 0));
+	assert(!radix_tree_tagged(&tree, 1));
+
+	item_kill_tree(&tree);
+}
+
+static void __multiorder_tag_test2(unsigned order, unsigned long index2)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	unsigned long index = (1 << order);
+	index2 += index;
+
+	assert(item_insert_order(&tree, 0, order) == 0);
+	assert(item_insert(&tree, index2) == 0);
+
+	assert(radix_tree_tag_set(&tree, 0, 0));
+	assert(radix_tree_tag_set(&tree, index2, 0));
+
+	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 2);
+
+	item_kill_tree(&tree);
+}
+
+static void multiorder_tag_tests(void)
+{
+	int i, j;
+
+	/* test multi-order entry for indices 0-7 with no sibling pointers */
+	__multiorder_tag_test(0, 3);
+	__multiorder_tag_test(5, 3);
+
+	/* test multi-order entry for indices 8-15 with no sibling pointers */
+	__multiorder_tag_test(8, 3);
+	__multiorder_tag_test(15, 3);
+
+	/*
+	 * Our order 5 entry covers indices 0-31 in a tree with height=2.
+	 * This is broken up as follows:
+	 * 0-7:		canonical entry
+	 * 8-15:	sibling 1
+	 * 16-23:	sibling 2
+	 * 24-31:	sibling 3
+	 */
+	__multiorder_tag_test(0, 5);
+	__multiorder_tag_test(29, 5);
+
+	/* same test, but with indices 32-63 */
+	__multiorder_tag_test(32, 5);
+	__multiorder_tag_test(44, 5);
+
+	/*
+	 * Our order 8 entry covers indices 0-255 in a tree with height=3.
+	 * This is broken up as follows:
+	 * 0-63:	canonical entry
+	 * 64-127:	sibling 1
+	 * 128-191:	sibling 2
+	 * 192-255:	sibling 3
+	 */
+	__multiorder_tag_test(0, 8);
+	__multiorder_tag_test(190, 8);
+
+	/* same test, but with indices 256-511 */
+	__multiorder_tag_test(256, 8);
+	__multiorder_tag_test(300, 8);
+
+	__multiorder_tag_test(0x12345678UL, 8);
+
+	for (i = 1; i < 10; i++)
+		for (j = 0; j < (10 << i); j++)
+			__multiorder_tag_test2(i, j);
+}
+
+static void multiorder_check(unsigned long index, int order)
+{
+	unsigned long i;
+	unsigned long min = index & ~((1UL << order) - 1);
+	unsigned long max = min + (1UL << order);
+	void **slot;
+	struct item *item2 = item_create(min, order);
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	printv(2, "Multiorder index %ld, order %d\n", index, order);
+
+	assert(item_insert_order(&tree, index, order) == 0);
+
+	for (i = min; i < max; i++) {
+		struct item *item = item_lookup(&tree, i);
+		assert(item != 0);
+		assert(item->index == index);
+	}
+	for (i = 0; i < min; i++)
+		item_check_absent(&tree, i);
+	for (i = max; i < 2*max; i++)
+		item_check_absent(&tree, i);
+	for (i = min; i < max; i++)
+		assert(radix_tree_insert(&tree, i, item2) == -EEXIST);
+
+	slot = radix_tree_lookup_slot(&tree, index);
+	free(*slot);
+	radix_tree_replace_slot(&tree, slot, item2);
+	for (i = min; i < max; i++) {
+		struct item *item = item_lookup(&tree, i);
+		assert(item != 0);
+		assert(item->index == min);
+	}
+
+	assert(item_delete(&tree, min) != 0);
+
+	for (i = 0; i < 2*max; i++)
+		item_check_absent(&tree, i);
+}
+
+static void multiorder_shrink(unsigned long index, int order)
+{
+	unsigned long i;
+	unsigned long max = 1 << order;
+	RADIX_TREE(tree, GFP_KERNEL);
+	struct radix_tree_node *node;
+
+	printv(2, "Multiorder shrink index %ld, order %d\n", index, order);
+
+	assert(item_insert_order(&tree, 0, order) == 0);
+
+	node = tree.rnode;
+
+	assert(item_insert(&tree, index) == 0);
+	assert(node != tree.rnode);
+
+	assert(item_delete(&tree, index) != 0);
+	assert(node == tree.rnode);
+
+	for (i = 0; i < max; i++) {
+		struct item *item = item_lookup(&tree, i);
+		assert(item != 0);
+		assert(item->index == 0);
+	}
+	for (i = max; i < 2*max; i++)
+		item_check_absent(&tree, i);
+
+	if (!item_delete(&tree, 0)) {
+		printv(2, "failed to delete index %ld (order %d)\n", index, order);
+		abort();
+	}
+
+	for (i = 0; i < 2*max; i++)
+		item_check_absent(&tree, i);
+}
+
+static void multiorder_insert_bug(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert(&tree, 0);
+	radix_tree_tag_set(&tree, 0, 0);
+	item_insert_order(&tree, 3 << 6, 6);
+
+	item_kill_tree(&tree);
+}
+
+void multiorder_iteration(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	struct radix_tree_iter iter;
+	void **slot;
+	int i, j, err;
+
+	printv(1, "Multiorder iteration test\n");
+
+#define NUM_ENTRIES 11
+	int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128};
+	int order[NUM_ENTRIES] = {1, 1, 2, 3,  4,  1,  0,  1,  3,  0, 7};
+
+	for (i = 0; i < NUM_ENTRIES; i++) {
+		err = item_insert_order(&tree, index[i], order[i]);
+		assert(!err);
+	}
+
+	for (j = 0; j < 256; j++) {
+		for (i = 0; i < NUM_ENTRIES; i++)
+			if (j <= (index[i] | ((1 << order[i]) - 1)))
+				break;
+
+		radix_tree_for_each_slot(slot, &tree, &iter, j) {
+			int height = order[i] / RADIX_TREE_MAP_SHIFT;
+			int shift = height * RADIX_TREE_MAP_SHIFT;
+			unsigned long mask = (1UL << order[i]) - 1;
+			struct item *item = *slot;
+
+			assert((iter.index | mask) == (index[i] | mask));
+			assert(iter.shift == shift);
+			assert(!radix_tree_is_internal_node(item));
+			assert((item->index | mask) == (index[i] | mask));
+			assert(item->order == order[i]);
+			i++;
+		}
+	}
+
+	item_kill_tree(&tree);
+}
+
+void multiorder_tagged_iteration(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	struct radix_tree_iter iter;
+	void **slot;
+	int i, j;
+
+	printv(1, "Multiorder tagged iteration test\n");
+
+#define MT_NUM_ENTRIES 9
+	int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128};
+	int order[MT_NUM_ENTRIES] = {1, 0, 2, 4,  3,  1,  3,  0,   7};
+
+#define TAG_ENTRIES 7
+	int tag_index[TAG_ENTRIES] = {0, 4, 16, 40, 64, 72, 128};
+
+	for (i = 0; i < MT_NUM_ENTRIES; i++)
+		assert(!item_insert_order(&tree, index[i], order[i]));
+
+	assert(!radix_tree_tagged(&tree, 1));
+
+	for (i = 0; i < TAG_ENTRIES; i++)
+		assert(radix_tree_tag_set(&tree, tag_index[i], 1));
+
+	for (j = 0; j < 256; j++) {
+		int k;
+
+		for (i = 0; i < TAG_ENTRIES; i++) {
+			for (k = i; index[k] < tag_index[i]; k++)
+				;
+			if (j <= (index[k] | ((1 << order[k]) - 1)))
+				break;
+		}
+
+		radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) {
+			unsigned long mask;
+			struct item *item = *slot;
+			for (k = i; index[k] < tag_index[i]; k++)
+				;
+			mask = (1UL << order[k]) - 1;
+
+			assert((iter.index | mask) == (tag_index[i] | mask));
+			assert(!radix_tree_is_internal_node(item));
+			assert((item->index | mask) == (tag_index[i] | mask));
+			assert(item->order == order[k]);
+			i++;
+		}
+	}
+
+	assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) ==
+				TAG_ENTRIES);
+
+	for (j = 0; j < 256; j++) {
+		int mask, k;
+
+		for (i = 0; i < TAG_ENTRIES; i++) {
+			for (k = i; index[k] < tag_index[i]; k++)
+				;
+			if (j <= (index[k] | ((1 << order[k]) - 1)))
+				break;
+		}
+
+		radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) {
+			struct item *item = *slot;
+			for (k = i; index[k] < tag_index[i]; k++)
+				;
+			mask = (1 << order[k]) - 1;
+
+			assert((iter.index | mask) == (tag_index[i] | mask));
+			assert(!radix_tree_is_internal_node(item));
+			assert((item->index | mask) == (tag_index[i] | mask));
+			assert(item->order == order[k]);
+			i++;
+		}
+	}
+
+	assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0)
+			== TAG_ENTRIES);
+	i = 0;
+	radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) {
+		assert(iter.index == tag_index[i]);
+		i++;
+	}
+
+	item_kill_tree(&tree);
+}
+
+/*
+ * Basic join checks: make sure we can't find an entry in the tree after
+ * a larger entry has replaced it
+ */
+static void multiorder_join1(unsigned long index,
+				unsigned order1, unsigned order2)
+{
+	unsigned long loc;
+	void *item, *item2 = item_create(index + 1, order1);
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert_order(&tree, index, order2);
+	item = radix_tree_lookup(&tree, index);
+	radix_tree_join(&tree, index + 1, order1, item2);
+	loc = find_item(&tree, item);
+	if (loc == -1)
+		free(item);
+	item = radix_tree_lookup(&tree, index + 1);
+	assert(item == item2);
+	item_kill_tree(&tree);
+}
+
+/*
+ * Check that the accounting of exceptional entries is handled correctly
+ * by joining an exceptional entry to a normal pointer.
+ */
+static void multiorder_join2(unsigned order1, unsigned order2)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	struct radix_tree_node *node;
+	void *item1 = item_create(0, order1);
+	void *item2;
+
+	item_insert_order(&tree, 0, order2);
+	radix_tree_insert(&tree, 1 << order2, (void *)0x12UL);
+	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
+	assert(item2 == (void *)0x12UL);
+	assert(node->exceptional == 1);
+
+	item2 = radix_tree_lookup(&tree, 0);
+	free(item2);
+
+	radix_tree_join(&tree, 0, order1, item1);
+	item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
+	assert(item2 == item1);
+	assert(node->exceptional == 0);
+	item_kill_tree(&tree);
+}
+
+/*
+ * This test revealed an accounting bug for exceptional entries at one point.
+ * Nodes were being freed back into the pool with an elevated exception count
+ * by radix_tree_join() and then radix_tree_split() was failing to zero the
+ * count of exceptional entries.
+ */
+static void multiorder_join3(unsigned int order)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	struct radix_tree_node *node;
+	void **slot;
+	struct radix_tree_iter iter;
+	unsigned long i;
+
+	for (i = 0; i < (1 << order); i++) {
+		radix_tree_insert(&tree, i, (void *)0x12UL);
+	}
+
+	radix_tree_join(&tree, 0, order, (void *)0x16UL);
+	rcu_barrier();
+
+	radix_tree_split(&tree, 0, 0);
+
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x12UL);
+	}
+
+	__radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(node->exceptional == node->count);
+
+	item_kill_tree(&tree);
+}
+
+static void multiorder_join(void)
+{
+	int i, j, idx;
+
+	for (idx = 0; idx < 1024; idx = idx * 2 + 3) {
+		for (i = 1; i < 15; i++) {
+			for (j = 0; j < i; j++) {
+				multiorder_join1(idx, i, j);
+			}
+		}
+	}
+
+	for (i = 1; i < 15; i++) {
+		for (j = 0; j < i; j++) {
+			multiorder_join2(i, j);
+		}
+	}
+
+	for (i = 3; i < 10; i++) {
+		multiorder_join3(i);
+	}
+}
+
+static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
+{
+	struct radix_tree_preload *rtp = &radix_tree_preloads;
+	if (rtp->nr != 0)
+		printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
+							rtp->nr);
+	/*
+	 * Can't check for equality here as some nodes may have been
+	 * RCU-freed while we ran.  But we should never finish with more
+	 * nodes allocated since they should have all been preloaded.
+	 */
+	if (nr_allocated > alloc)
+		printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
+							alloc, nr_allocated);
+}
+
+static void __multiorder_split(int old_order, int new_order)
+{
+	RADIX_TREE(tree, GFP_ATOMIC);
+	void **slot;
+	struct radix_tree_iter iter;
+	unsigned alloc;
+	struct item *item;
+
+	radix_tree_preload(GFP_KERNEL);
+	assert(item_insert_order(&tree, 0, old_order) == 0);
+	radix_tree_preload_end();
+
+	/* Wipe out the preloaded cache or it'll confuse check_mem() */
+	radix_tree_cpu_dead(0);
+
+	item = radix_tree_tag_set(&tree, 0, 2);
+
+	radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
+	alloc = nr_allocated;
+	radix_tree_split(&tree, 0, new_order);
+	check_mem(old_order, new_order, alloc);
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_iter_replace(&tree, &iter, slot,
+					item_create(iter.index, new_order));
+	}
+	radix_tree_preload_end();
+
+	item_kill_tree(&tree);
+	free(item);
+}
+
+static void __multiorder_split2(int old_order, int new_order)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	void **slot;
+	struct radix_tree_iter iter;
+	struct radix_tree_node *node;
+	void *item;
+
+	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+	item = __radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(item == (void *)0x12);
+	assert(node->exceptional > 0);
+
+	radix_tree_split(&tree, 0, new_order);
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_iter_replace(&tree, &iter, slot,
+					item_create(iter.index, new_order));
+	}
+
+	item = __radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(item != (void *)0x12);
+	assert(node->exceptional == 0);
+
+	item_kill_tree(&tree);
+}
+
+static void __multiorder_split3(int old_order, int new_order)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	void **slot;
+	struct radix_tree_iter iter;
+	struct radix_tree_node *node;
+	void *item;
+
+	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+	item = __radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(item == (void *)0x12);
+	assert(node->exceptional > 0);
+
+	radix_tree_split(&tree, 0, new_order);
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
+	}
+
+	item = __radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(item == (void *)0x16);
+	assert(node->exceptional > 0);
+
+	item_kill_tree(&tree);
+
+	__radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+	item = __radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(item == (void *)0x12);
+	assert(node->exceptional > 0);
+
+	radix_tree_split(&tree, 0, new_order);
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		if (iter.index == (1 << new_order))
+			radix_tree_iter_replace(&tree, &iter, slot,
+						(void *)0x16);
+		else
+			radix_tree_iter_replace(&tree, &iter, slot, NULL);
+	}
+
+	item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
+	assert(item == (void *)0x16);
+	assert(node->count == node->exceptional);
+	do {
+		node = node->parent;
+		if (!node)
+			break;
+		assert(node->count == 1);
+		assert(node->exceptional == 0);
+	} while (1);
+
+	item_kill_tree(&tree);
+}
+
+static void multiorder_split(void)
+{
+	int i, j;
+
+	for (i = 3; i < 11; i++)
+		for (j = 0; j < i; j++) {
+			__multiorder_split(i, j);
+			__multiorder_split2(i, j);
+			__multiorder_split3(i, j);
+		}
+}
+
+static void multiorder_account(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	struct radix_tree_node *node;
+	void **slot;
+
+	item_insert_order(&tree, 0, 5);
+
+	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
+	__radix_tree_lookup(&tree, 0, &node, NULL);
+	assert(node->count == node->exceptional * 2);
+	radix_tree_delete(&tree, 1 << 5);
+	assert(node->exceptional == 0);
+
+	__radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
+	__radix_tree_lookup(&tree, 1 << 5, &node, &slot);
+	assert(node->count == node->exceptional * 2);
+	__radix_tree_replace(&tree, node, slot, NULL, NULL);
+	assert(node->exceptional == 0);
+
+	item_kill_tree(&tree);
+}
+
+bool stop_iteration = false;
+
+static void *creator_func(void *ptr)
+{
+	/* 'order' is set up to ensure we have sibling entries */
+	unsigned int order = RADIX_TREE_MAP_SHIFT - 1;
+	struct radix_tree_root *tree = ptr;
+	int i;
+
+	for (i = 0; i < 10000; i++) {
+		item_insert_order(tree, 0, order);
+		item_delete_rcu(tree, 0);
+	}
+
+	stop_iteration = true;
+	return NULL;
+}
+
+static void *iterator_func(void *ptr)
+{
+	struct radix_tree_root *tree = ptr;
+	struct radix_tree_iter iter;
+	struct item *item;
+	void **slot;
+
+	while (!stop_iteration) {
+		rcu_read_lock();
+		radix_tree_for_each_slot(slot, tree, &iter, 0) {
+			item = radix_tree_deref_slot(slot);
+
+			if (!item)
+				continue;
+			if (radix_tree_deref_retry(item)) {
+				slot = radix_tree_iter_retry(&iter);
+				continue;
+			}
+
+			item_sanity(item, iter.index);
+		}
+		rcu_read_unlock();
+	}
+	return NULL;
+}
+
+static void multiorder_iteration_race(void)
+{
+	const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+	pthread_t worker_thread[num_threads];
+	RADIX_TREE(tree, GFP_KERNEL);
+	int i;
+
+	pthread_create(&worker_thread[0], NULL, &creator_func, &tree);
+	for (i = 1; i < num_threads; i++)
+		pthread_create(&worker_thread[i], NULL, &iterator_func, &tree);
+
+	for (i = 0; i < num_threads; i++)
+		pthread_join(worker_thread[i], NULL);
+
+	item_kill_tree(&tree);
+}
+
+void multiorder_checks(void)
+{
+	int i;
+
+	for (i = 0; i < 20; i++) {
+		multiorder_check(200, i);
+		multiorder_check(0, i);
+		multiorder_check((1UL << i) + 1, i);
+	}
+
+	for (i = 0; i < 15; i++)
+		multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i);
+
+	multiorder_insert_bug();
+	multiorder_tag_tests();
+	multiorder_iteration();
+	multiorder_tagged_iteration();
+	multiorder_join();
+	multiorder_split();
+	multiorder_account();
+	multiorder_iteration_race();
+
+	radix_tree_cpu_dead(0);
+}
+
+int __weak main(void)
+{
+	radix_tree_init();
+	multiorder_checks();
+	return 0;
+}
diff --git a/tools/testing/radix-tree/regression.h b/tools/testing/radix-tree/regression.h
new file mode 100644
index 000000000..3c8a1584e
--- /dev/null
+++ b/tools/testing/radix-tree/regression.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __REGRESSION_H__
+#define __REGRESSION_H__
+
+void regression1_test(void);
+void regression2_test(void);
+void regression3_test(void);
+
+#endif
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
new file mode 100644
index 000000000..0aece092f
--- /dev/null
+++ b/tools/testing/radix-tree/regression1.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Regression1
+ * Description:
+ * Salman Qazi describes the following radix-tree bug:
+ *
+ * In the following case, we get can get a deadlock:
+ *
+ * 0.  The radix tree contains two items, one has the index 0.
+ * 1.  The reader (in this case find_get_pages) takes the rcu_read_lock.
+ * 2.  The reader acquires slot(s) for item(s) including the index 0 item.
+ * 3.  The non-zero index item is deleted, and as a consequence the other item
+ *     is moved to the root of the tree. The place where it used to be is queued
+ *     for deletion after the readers finish.
+ * 3b. The zero item is deleted, removing it from the direct slot, it remains in
+ *     the rcu-delayed indirect node.
+ * 4.  The reader looks at the index 0 slot, and finds that the page has 0 ref
+ *     count
+ * 5.  The reader looks at it again, hoping that the item will either be freed
+ *     or the ref count will increase. This never happens, as the slot it is
+ *     looking at will never be updated. Also, this slot can never be reclaimed
+ *     because the reader is holding rcu_read_lock and is in an infinite loop.
+ *
+ * The fix is to re-use the same "indirect" pointer case that requires a slot
+ * lookup retry into a general "retry the lookup" bit.
+ *
+ * Running:
+ * This test should run to completion in a few seconds. The above bug would
+ * cause it to hang indefinitely.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "regression.h"
+
+static RADIX_TREE(mt_tree, GFP_KERNEL);
+static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
+
+struct page {
+	pthread_mutex_t lock;
+	struct rcu_head rcu;
+	int count;
+	unsigned long index;
+};
+
+static struct page *page_alloc(void)
+{
+	struct page *p;
+	p = malloc(sizeof(struct page));
+	p->count = 1;
+	p->index = 1;
+	pthread_mutex_init(&p->lock, NULL);
+
+	return p;
+}
+
+static void page_rcu_free(struct rcu_head *rcu)
+{
+	struct page *p = container_of(rcu, struct page, rcu);
+	assert(!p->count);
+	pthread_mutex_destroy(&p->lock);
+	free(p);
+}
+
+static void page_free(struct page *p)
+{
+	call_rcu(&p->rcu, page_rcu_free);
+}
+
+static unsigned find_get_pages(unsigned long start,
+			    unsigned int nr_pages, struct page **pages)
+{
+	unsigned int i;
+	unsigned int ret;
+	unsigned int nr_found;
+
+	rcu_read_lock();
+restart:
+	nr_found = radix_tree_gang_lookup_slot(&mt_tree,
+				(void ***)pages, NULL, start, nr_pages);
+	ret = 0;
+	for (i = 0; i < nr_found; i++) {
+		struct page *page;
+repeat:
+		page = radix_tree_deref_slot((void **)pages[i]);
+		if (unlikely(!page))
+			continue;
+
+		if (radix_tree_exception(page)) {
+			if (radix_tree_deref_retry(page)) {
+				/*
+				 * Transient condition which can only trigger
+				 * when entry at index 0 moves out of or back
+				 * to root: none yet gotten, safe to restart.
+				 */
+				assert((start | i) == 0);
+				goto restart;
+			}
+			/*
+			 * No exceptional entries are inserted in this test.
+			 */
+			assert(0);
+		}
+
+		pthread_mutex_lock(&page->lock);
+		if (!page->count) {
+			pthread_mutex_unlock(&page->lock);
+			goto repeat;
+		}
+		/* don't actually update page refcount */
+		pthread_mutex_unlock(&page->lock);
+
+		/* Has the page moved? */
+		if (unlikely(page != *((void **)pages[i]))) {
+			goto repeat;
+		}
+
+		pages[ret] = page;
+		ret++;
+	}
+	rcu_read_unlock();
+	return ret;
+}
+
+static pthread_barrier_t worker_barrier;
+
+static void *regression1_fn(void *arg)
+{
+	rcu_register_thread();
+
+	if (pthread_barrier_wait(&worker_barrier) ==
+			PTHREAD_BARRIER_SERIAL_THREAD) {
+		int j;
+
+		for (j = 0; j < 1000000; j++) {
+			struct page *p;
+
+			p = page_alloc();
+			pthread_mutex_lock(&mt_lock);
+			radix_tree_insert(&mt_tree, 0, p);
+			pthread_mutex_unlock(&mt_lock);
+
+			p = page_alloc();
+			pthread_mutex_lock(&mt_lock);
+			radix_tree_insert(&mt_tree, 1, p);
+			pthread_mutex_unlock(&mt_lock);
+
+			pthread_mutex_lock(&mt_lock);
+			p = radix_tree_delete(&mt_tree, 1);
+			pthread_mutex_lock(&p->lock);
+			p->count--;
+			pthread_mutex_unlock(&p->lock);
+			pthread_mutex_unlock(&mt_lock);
+			page_free(p);
+
+			pthread_mutex_lock(&mt_lock);
+			p = radix_tree_delete(&mt_tree, 0);
+			pthread_mutex_lock(&p->lock);
+			p->count--;
+			pthread_mutex_unlock(&p->lock);
+			pthread_mutex_unlock(&mt_lock);
+			page_free(p);
+		}
+	} else {
+		int j;
+
+		for (j = 0; j < 100000000; j++) {
+			struct page *pages[10];
+
+			find_get_pages(0, 10, pages);
+		}
+	}
+
+	rcu_unregister_thread();
+
+	return NULL;
+}
+
+static pthread_t *threads;
+void regression1_test(void)
+{
+	int nr_threads;
+	int i;
+	long arg;
+
+	/* Regression #1 */
+	printv(1, "running regression test 1, should finish in under a minute\n");
+	nr_threads = 2;
+	pthread_barrier_init(&worker_barrier, NULL, nr_threads);
+
+	threads = malloc(nr_threads * sizeof(pthread_t *));
+
+	for (i = 0; i < nr_threads; i++) {
+		arg = i;
+		if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
+			perror("pthread_create");
+			exit(1);
+		}
+	}
+
+	for (i = 0; i < nr_threads; i++) {
+		if (pthread_join(threads[i], NULL)) {
+			perror("pthread_join");
+			exit(1);
+		}
+	}
+
+	free(threads);
+
+	printv(1, "regression test 1, done\n");
+}
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
new file mode 100644
index 000000000..424b91c77
--- /dev/null
+++ b/tools/testing/radix-tree/regression2.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Regression2
+ * Description:
+ * Toshiyuki Okajima describes the following radix-tree bug:
+ *
+ * In the following case, we can get a hangup on
+ *   radix_radix_tree_gang_lookup_tag_slot.
+ *
+ * 0.  The radix tree contains RADIX_TREE_MAP_SIZE items. And the tag of
+ *     a certain item has PAGECACHE_TAG_DIRTY.
+ * 1.  radix_tree_range_tag_if_tagged(, start, end, , PAGECACHE_TAG_DIRTY,
+ *     PAGECACHE_TAG_TOWRITE) is called to add PAGECACHE_TAG_TOWRITE tag
+ *     for the tag which has PAGECACHE_TAG_DIRTY. However, there is no tag with
+ *     PAGECACHE_TAG_DIRTY within the range from start to end. As the result,
+ *     There is no tag with PAGECACHE_TAG_TOWRITE but the root tag has
+ *     PAGECACHE_TAG_TOWRITE.
+ * 2.  An item is added into the radix tree and then the level of it is
+ *     extended into 2 from 1. At that time, the new radix tree node succeeds
+ *     the tag status of the root tag. Therefore the tag of the new radix tree
+ *     node has PAGECACHE_TAG_TOWRITE but there is not slot with
+ *     PAGECACHE_TAG_TOWRITE tag in the child node of the new radix tree node.
+ * 3.  The tag of a certain item is cleared with PAGECACHE_TAG_DIRTY.
+ * 4.  All items within the index range from 0 to RADIX_TREE_MAP_SIZE - 1 are
+ *     released. (Only the item which index is RADIX_TREE_MAP_SIZE exist in the
+ *     radix tree.) As the result, the slot of the radix tree node is NULL but
+ *     the tag which corresponds to the slot has PAGECACHE_TAG_TOWRITE.
+ * 5.  radix_tree_gang_lookup_tag_slot(PAGECACHE_TAG_TOWRITE) calls
+ *     __lookup_tag. __lookup_tag returns with 0. And __lookup_tag doesn't
+ *     change the index that is the input and output parameter. Because the 1st
+ *     slot of the radix tree node is NULL, but the tag which corresponds to
+ *     the slot has PAGECACHE_TAG_TOWRITE.
+ *     Therefore radix_tree_gang_lookup_tag_slot tries to get some items by
+ *     calling __lookup_tag, but it cannot get any items forever.
+ *
+ * The fix is to change that radix_tree_tag_if_tagged doesn't tag the root tag
+ * if it doesn't set any tags within the specified range.
+ *
+ * Running:
+ * This test should run to completion immediately. The above bug would cause it
+ * to hang indefinitely.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "regression.h"
+#include "test.h"
+
+#define PAGECACHE_TAG_DIRTY     0
+#define PAGECACHE_TAG_WRITEBACK 1
+#define PAGECACHE_TAG_TOWRITE   2
+
+static RADIX_TREE(mt_tree, GFP_KERNEL);
+unsigned long page_count = 0;
+
+struct page {
+	unsigned long index;
+};
+
+static struct page *page_alloc(void)
+{
+	struct page *p;
+	p = malloc(sizeof(struct page));
+	p->index = page_count++;
+
+	return p;
+}
+
+void regression2_test(void)
+{
+	int i;
+	struct page *p;
+	int max_slots = RADIX_TREE_MAP_SIZE;
+	unsigned long int start, end;
+	struct page *pages[1];
+
+	printv(1, "running regression test 2 (should take milliseconds)\n");
+	/* 0. */
+	for (i = 0; i <= max_slots - 1; i++) {
+		p = page_alloc();
+		radix_tree_insert(&mt_tree, i, p);
+	}
+	radix_tree_tag_set(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY);
+
+	/* 1. */
+	start = 0;
+	end = max_slots - 2;
+	tag_tagged_items(&mt_tree, NULL, start, end, 1,
+				PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
+
+	/* 2. */
+	p = page_alloc();
+	radix_tree_insert(&mt_tree, max_slots, p);
+
+	/* 3. */
+	radix_tree_tag_clear(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY);
+
+	/* 4. */
+	for (i = max_slots - 1; i >= 0; i--)
+		free(radix_tree_delete(&mt_tree, i));
+
+	/* 5. */
+	// NOTE: start should not be 0 because radix_tree_gang_lookup_tag_slot
+	//       can return.
+	start = 1;
+	end = max_slots - 2;
+	radix_tree_gang_lookup_tag_slot(&mt_tree, (void ***)pages, start, end,
+		PAGECACHE_TAG_TOWRITE);
+
+	/* We remove all the remained nodes */
+	free(radix_tree_delete(&mt_tree, max_slots));
+
+	BUG_ON(!radix_tree_empty(&mt_tree));
+
+	printv(1, "regression test 2, done\n");
+}
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
new file mode 100644
index 000000000..ace2543c3
--- /dev/null
+++ b/tools/testing/radix-tree/regression3.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Regression3
+ * Description:
+ * Helper radix_tree_iter_retry resets next_index to the current index.
+ * In following radix_tree_next_slot current chunk size becomes zero.
+ * This isn't checked and it tries to dereference null pointer in slot.
+ *
+ * Helper radix_tree_iter_resume reset slot to NULL and next_index to index + 1,
+ * for tagger iteraction it also must reset cached tags in iterator to abort
+ * next radix_tree_next_slot and go to slow-path into radix_tree_next_chunk.
+ *
+ * Running:
+ * This test should run to completion immediately. The above bug would
+ * cause it to segfault.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "regression.h"
+
+void regression3_test(void)
+{
+	RADIX_TREE(root, GFP_KERNEL);
+	void *ptr0 = (void *)4ul;
+	void *ptr = (void *)8ul;
+	struct radix_tree_iter iter;
+	void **slot;
+	bool first;
+
+	printv(1, "running regression test 3 (should take milliseconds)\n");
+
+	radix_tree_insert(&root, 0, ptr0);
+	radix_tree_tag_set(&root, 0, 0);
+
+	first = true;
+	radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
+		printv(2, "tagged %ld %p\n", iter.index, *slot);
+		if (first) {
+			radix_tree_insert(&root, 1, ptr);
+			radix_tree_tag_set(&root, 1, 0);
+			first = false;
+		}
+		if (radix_tree_deref_retry(*slot)) {
+			printv(2, "retry at %ld\n", iter.index);
+			slot = radix_tree_iter_retry(&iter);
+			continue;
+		}
+	}
+	radix_tree_delete(&root, 1);
+
+	first = true;
+	radix_tree_for_each_slot(slot, &root, &iter, 0) {
+		printv(2, "slot %ld %p\n", iter.index, *slot);
+		if (first) {
+			radix_tree_insert(&root, 1, ptr);
+			first = false;
+		}
+		if (radix_tree_deref_retry(*slot)) {
+			printv(2, "retry at %ld\n", iter.index);
+			slot = radix_tree_iter_retry(&iter);
+			continue;
+		}
+	}
+	radix_tree_delete(&root, 1);
+
+	first = true;
+	radix_tree_for_each_contig(slot, &root, &iter, 0) {
+		printv(2, "contig %ld %p\n", iter.index, *slot);
+		if (first) {
+			radix_tree_insert(&root, 1, ptr);
+			first = false;
+		}
+		if (radix_tree_deref_retry(*slot)) {
+			printv(2, "retry at %ld\n", iter.index);
+			slot = radix_tree_iter_retry(&iter);
+			continue;
+		}
+	}
+
+	radix_tree_for_each_slot(slot, &root, &iter, 0) {
+		printv(2, "slot %ld %p\n", iter.index, *slot);
+		if (!iter.index) {
+			printv(2, "next at %ld\n", iter.index);
+			slot = radix_tree_iter_resume(slot, &iter);
+		}
+	}
+
+	radix_tree_for_each_contig(slot, &root, &iter, 0) {
+		printv(2, "contig %ld %p\n", iter.index, *slot);
+		if (!iter.index) {
+			printv(2, "next at %ld\n", iter.index);
+			slot = radix_tree_iter_resume(slot, &iter);
+		}
+	}
+
+	radix_tree_tag_set(&root, 0, 0);
+	radix_tree_tag_set(&root, 1, 0);
+	radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
+		printv(2, "tagged %ld %p\n", iter.index, *slot);
+		if (!iter.index) {
+			printv(2, "next at %ld\n", iter.index);
+			slot = radix_tree_iter_resume(slot, &iter);
+		}
+	}
+
+	radix_tree_delete(&root, 0);
+	radix_tree_delete(&root, 1);
+
+	printv(1, "regression test 3 passed\n");
+}
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
new file mode 100644
index 000000000..543181e48
--- /dev/null
+++ b/tools/testing/radix-tree/tag_check.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+
+#include "test.h"
+
+
+static void
+__simple_checks(struct radix_tree_root *tree, unsigned long index, int tag)
+{
+	unsigned long first = 0;
+	int ret;
+
+	item_check_absent(tree, index);
+	assert(item_tag_get(tree, index, tag) == 0);
+
+	item_insert(tree, index);
+	assert(item_tag_get(tree, index, tag) == 0);
+	item_tag_set(tree, index, tag);
+	ret = item_tag_get(tree, index, tag);
+	assert(ret != 0);
+	ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag);
+	assert(ret == 1);
+	ret = item_tag_get(tree, index, !tag);
+	assert(ret != 0);
+	ret = item_delete(tree, index);
+	assert(ret != 0);
+	item_insert(tree, index);
+	ret = item_tag_get(tree, index, tag);
+	assert(ret == 0);
+	ret = item_delete(tree, index);
+	assert(ret != 0);
+	ret = item_delete(tree, index);
+	assert(ret == 0);
+}
+
+void simple_checks(void)
+{
+	unsigned long index;
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	for (index = 0; index < 10000; index++) {
+		__simple_checks(&tree, index, 0);
+		__simple_checks(&tree, index, 1);
+	}
+	verify_tag_consistency(&tree, 0);
+	verify_tag_consistency(&tree, 1);
+	printv(2, "before item_kill_tree: %d allocated\n", nr_allocated);
+	item_kill_tree(&tree);
+	rcu_barrier();
+	printv(2, "after item_kill_tree: %d allocated\n", nr_allocated);
+}
+
+/*
+ * Check that tags propagate correctly when extending a tree.
+ */
+static void extend_checks(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert(&tree, 43);
+	assert(item_tag_get(&tree, 43, 0) == 0);
+	item_tag_set(&tree, 43, 0);
+	assert(item_tag_get(&tree, 43, 0) == 1);
+	item_insert(&tree, 1000000);
+	assert(item_tag_get(&tree, 43, 0) == 1);
+
+	item_insert(&tree, 0);
+	item_tag_set(&tree, 0, 0);
+	item_delete(&tree, 1000000);
+	assert(item_tag_get(&tree, 43, 0) != 0);
+	item_delete(&tree, 43);
+	assert(item_tag_get(&tree, 43, 0) == 0);	/* crash */
+	assert(item_tag_get(&tree, 0, 0) == 1);
+
+	verify_tag_consistency(&tree, 0);
+
+	item_kill_tree(&tree);
+}
+
+/*
+ * Check that tags propagate correctly when contracting a tree.
+ */
+static void contract_checks(void)
+{
+	struct item *item;
+	int tmp;
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	tmp = 1<<RADIX_TREE_MAP_SHIFT;
+	item_insert(&tree, tmp);
+	item_insert(&tree, tmp+1);
+	item_tag_set(&tree, tmp, 0);
+	item_tag_set(&tree, tmp, 1);
+	item_tag_set(&tree, tmp+1, 0);
+	item_delete(&tree, tmp+1);
+	item_tag_clear(&tree, tmp, 1);
+
+	assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 0) == 1);
+	assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 1) == 0);
+
+	assert(item_tag_get(&tree, tmp, 0) == 1);
+	assert(item_tag_get(&tree, tmp, 1) == 0);
+
+	verify_tag_consistency(&tree, 0);
+	item_kill_tree(&tree);
+}
+
+/*
+ * Stupid tag thrasher
+ *
+ * Create a large linear array corresponding to the tree.   Each element in
+ * the array is coherent with each node in the tree
+ */
+
+enum {
+	NODE_ABSENT = 0,
+	NODE_PRESENT = 1,
+	NODE_TAGGED = 2,
+};
+
+#define THRASH_SIZE		(1000 * 1000)
+#define N 127
+#define BATCH	33
+
+static void gang_check(struct radix_tree_root *tree,
+			char *thrash_state, int tag)
+{
+	struct item *items[BATCH];
+	int nr_found;
+	unsigned long index = 0;
+	unsigned long last_index = 0;
+
+	while ((nr_found = radix_tree_gang_lookup_tag(tree, (void **)items,
+					index, BATCH, tag))) {
+		int i;
+
+		for (i = 0; i < nr_found; i++) {
+			struct item *item = items[i];
+
+			while (last_index < item->index) {
+				assert(thrash_state[last_index] != NODE_TAGGED);
+				last_index++;
+			}
+			assert(thrash_state[last_index] == NODE_TAGGED);
+			last_index++;
+		}
+		index = items[nr_found - 1]->index + 1;
+	}
+}
+
+static void do_thrash(struct radix_tree_root *tree, char *thrash_state, int tag)
+{
+	int insert_chunk;
+	int delete_chunk;
+	int tag_chunk;
+	int untag_chunk;
+	int total_tagged = 0;
+	int total_present = 0;
+
+	for (insert_chunk = 1; insert_chunk < THRASH_SIZE; insert_chunk *= N)
+	for (delete_chunk = 1; delete_chunk < THRASH_SIZE; delete_chunk *= N)
+	for (tag_chunk = 1; tag_chunk < THRASH_SIZE; tag_chunk *= N)
+	for (untag_chunk = 1; untag_chunk < THRASH_SIZE; untag_chunk *= N) {
+		int i;
+		unsigned long index;
+		int nr_inserted = 0;
+		int nr_deleted = 0;
+		int nr_tagged = 0;
+		int nr_untagged = 0;
+		int actual_total_tagged;
+		int actual_total_present;
+
+		for (i = 0; i < insert_chunk; i++) {
+			index = rand() % THRASH_SIZE;
+			if (thrash_state[index] != NODE_ABSENT)
+				continue;
+			item_check_absent(tree, index);
+			item_insert(tree, index);
+			assert(thrash_state[index] != NODE_PRESENT);
+			thrash_state[index] = NODE_PRESENT;
+			nr_inserted++;
+			total_present++;
+		}
+
+		for (i = 0; i < delete_chunk; i++) {
+			index = rand() % THRASH_SIZE;
+			if (thrash_state[index] == NODE_ABSENT)
+				continue;
+			item_check_present(tree, index);
+			if (item_tag_get(tree, index, tag)) {
+				assert(thrash_state[index] == NODE_TAGGED);
+				total_tagged--;
+			} else {
+				assert(thrash_state[index] == NODE_PRESENT);
+			}
+			item_delete(tree, index);
+			assert(thrash_state[index] != NODE_ABSENT);
+			thrash_state[index] = NODE_ABSENT;
+			nr_deleted++;
+			total_present--;
+		}
+
+		for (i = 0; i < tag_chunk; i++) {
+			index = rand() % THRASH_SIZE;
+			if (thrash_state[index] != NODE_PRESENT) {
+				if (item_lookup(tree, index))
+					assert(item_tag_get(tree, index, tag));
+				continue;
+			}
+			item_tag_set(tree, index, tag);
+			item_tag_set(tree, index, tag);
+			assert(thrash_state[index] != NODE_TAGGED);
+			thrash_state[index] = NODE_TAGGED;
+			nr_tagged++;
+			total_tagged++;
+		}
+
+		for (i = 0; i < untag_chunk; i++) {
+			index = rand() % THRASH_SIZE;
+			if (thrash_state[index] != NODE_TAGGED)
+				continue;
+			item_check_present(tree, index);
+			assert(item_tag_get(tree, index, tag));
+			item_tag_clear(tree, index, tag);
+			item_tag_clear(tree, index, tag);
+			assert(thrash_state[index] != NODE_PRESENT);
+			thrash_state[index] = NODE_PRESENT;
+			nr_untagged++;
+			total_tagged--;
+		}
+
+		actual_total_tagged = 0;
+		actual_total_present = 0;
+		for (index = 0; index < THRASH_SIZE; index++) {
+			switch (thrash_state[index]) {
+			case NODE_ABSENT:
+				item_check_absent(tree, index);
+				break;
+			case NODE_PRESENT:
+				item_check_present(tree, index);
+				assert(!item_tag_get(tree, index, tag));
+				actual_total_present++;
+				break;
+			case NODE_TAGGED:
+				item_check_present(tree, index);
+				assert(item_tag_get(tree, index, tag));
+				actual_total_present++;
+				actual_total_tagged++;
+				break;
+			}
+		}
+
+		gang_check(tree, thrash_state, tag);
+
+		printv(2, "%d(%d) %d(%d) %d(%d) %d(%d) / "
+				"%d(%d) present, %d(%d) tagged\n",
+			insert_chunk, nr_inserted,
+			delete_chunk, nr_deleted,
+			tag_chunk, nr_tagged,
+			untag_chunk, nr_untagged,
+			total_present, actual_total_present,
+			total_tagged, actual_total_tagged);
+	}
+}
+
+static void thrash_tags(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+	char *thrash_state;
+
+	thrash_state = malloc(THRASH_SIZE);
+	memset(thrash_state, 0, THRASH_SIZE);
+
+	do_thrash(&tree, thrash_state, 0);
+
+	verify_tag_consistency(&tree, 0);
+	item_kill_tree(&tree);
+	free(thrash_state);
+}
+
+static void leak_check(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert(&tree, 1000000);
+	item_delete(&tree, 1000000);
+	item_kill_tree(&tree);
+}
+
+static void __leak_check(void)
+{
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	item_insert(&tree, 1000000);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	item_delete(&tree, 1000000);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+	item_kill_tree(&tree);
+	printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+}
+
+static void single_check(void)
+{
+	struct item *items[BATCH];
+	RADIX_TREE(tree, GFP_KERNEL);
+	int ret;
+	unsigned long first = 0;
+
+	item_insert(&tree, 0);
+	item_tag_set(&tree, 0, 0);
+	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0);
+	assert(ret == 1);
+	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 1, BATCH, 0);
+	assert(ret == 0);
+	verify_tag_consistency(&tree, 0);
+	verify_tag_consistency(&tree, 1);
+	ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1);
+	assert(ret == 1);
+	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1);
+	assert(ret == 1);
+	item_tag_clear(&tree, 0, 0);
+	ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0);
+	assert(ret == 0);
+	item_kill_tree(&tree);
+}
+
+void radix_tree_clear_tags_test(void)
+{
+	unsigned long index;
+	struct radix_tree_node *node;
+	struct radix_tree_iter iter;
+	void **slot;
+
+	RADIX_TREE(tree, GFP_KERNEL);
+
+	item_insert(&tree, 0);
+	item_tag_set(&tree, 0, 0);
+	__radix_tree_lookup(&tree, 0, &node, &slot);
+	radix_tree_clear_tags(&tree, node, slot);
+	assert(item_tag_get(&tree, 0, 0) == 0);
+
+	for (index = 0; index < 1000; index++) {
+		item_insert(&tree, index);
+		item_tag_set(&tree, index, 0);
+	}
+
+	radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+		radix_tree_clear_tags(&tree, iter.node, slot);
+		assert(item_tag_get(&tree, iter.index, 0) == 0);
+	}
+
+	item_kill_tree(&tree);
+}
+
+void tag_check(void)
+{
+	single_check();
+	extend_checks();
+	contract_checks();
+	rcu_barrier();
+	printv(2, "after extend_checks: %d allocated\n", nr_allocated);
+	__leak_check();
+	leak_check();
+	rcu_barrier();
+	printv(2, "after leak_check: %d allocated\n", nr_allocated);
+	simple_checks();
+	rcu_barrier();
+	printv(2, "after simple_checks: %d allocated\n", nr_allocated);
+	thrash_tags();
+	rcu_barrier();
+	printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
+	radix_tree_clear_tags_test();
+}
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
new file mode 100644
index 000000000..def601557
--- /dev/null
+++ b/tools/testing/radix-tree/test.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "test.h"
+
+struct item *
+item_tag_set(struct radix_tree_root *root, unsigned long index, int tag)
+{
+	return radix_tree_tag_set(root, index, tag);
+}
+
+struct item *
+item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag)
+{
+	return radix_tree_tag_clear(root, index, tag);
+}
+
+int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag)
+{
+	return radix_tree_tag_get(root, index, tag);
+}
+
+int __item_insert(struct radix_tree_root *root, struct item *item)
+{
+	return __radix_tree_insert(root, item->index, item->order, item);
+}
+
+struct item *item_create(unsigned long index, unsigned int order)
+{
+	struct item *ret = malloc(sizeof(*ret));
+
+	ret->index = index;
+	ret->order = order;
+	return ret;
+}
+
+int item_insert_order(struct radix_tree_root *root, unsigned long index,
+			unsigned order)
+{
+	struct item *item = item_create(index, order);
+	int err = __item_insert(root, item);
+	if (err)
+		free(item);
+	return err;
+}
+
+int item_insert(struct radix_tree_root *root, unsigned long index)
+{
+	return item_insert_order(root, index, 0);
+}
+
+void item_sanity(struct item *item, unsigned long index)
+{
+	unsigned long mask;
+	assert(!radix_tree_is_internal_node(item));
+	assert(item->order < BITS_PER_LONG);
+	mask = (1UL << item->order) - 1;
+	assert((item->index | mask) == (index | mask));
+}
+
+int item_delete(struct radix_tree_root *root, unsigned long index)
+{
+	struct item *item = radix_tree_delete(root, index);
+
+	if (item) {
+		item_sanity(item, index);
+		free(item);
+		return 1;
+	}
+	return 0;
+}
+
+static void item_free_rcu(struct rcu_head *head)
+{
+	struct item *item = container_of(head, struct item, rcu_head);
+
+	free(item);
+}
+
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index)
+{
+	struct item *item = radix_tree_delete(root, index);
+
+	if (item) {
+		item_sanity(item, index);
+		call_rcu(&item->rcu_head, item_free_rcu);
+		return 1;
+	}
+	return 0;
+}
+
+void item_check_present(struct radix_tree_root *root, unsigned long index)
+{
+	struct item *item;
+
+	item = radix_tree_lookup(root, index);
+	assert(item != NULL);
+	item_sanity(item, index);
+}
+
+struct item *item_lookup(struct radix_tree_root *root, unsigned long index)
+{
+	return radix_tree_lookup(root, index);
+}
+
+void item_check_absent(struct radix_tree_root *root, unsigned long index)
+{
+	struct item *item;
+
+	item = radix_tree_lookup(root, index);
+	assert(item == NULL);
+}
+
+/*
+ * Scan only the passed (start, start+nr] for present items
+ */
+void item_gang_check_present(struct radix_tree_root *root,
+			unsigned long start, unsigned long nr,
+			int chunk, int hop)
+{
+	struct item *items[chunk];
+	unsigned long into;
+
+	for (into = 0; into < nr; ) {
+		int nfound;
+		int nr_to_find = chunk;
+		int i;
+
+		if (nr_to_find > (nr - into))
+			nr_to_find = nr - into;
+
+		nfound = radix_tree_gang_lookup(root, (void **)items,
+						start + into, nr_to_find);
+		assert(nfound == nr_to_find);
+		for (i = 0; i < nfound; i++)
+			assert(items[i]->index == start + into + i);
+		into += hop;
+	}
+}
+
+/*
+ * Scan the entire tree, only expecting present items (start, start+nr]
+ */
+void item_full_scan(struct radix_tree_root *root, unsigned long start,
+			unsigned long nr, int chunk)
+{
+	struct item *items[chunk];
+	unsigned long into = 0;
+	unsigned long this_index = start;
+	int nfound;
+	int i;
+
+//	printf("%s(0x%08lx, 0x%08lx, %d)\n", __FUNCTION__, start, nr, chunk);
+
+	while ((nfound = radix_tree_gang_lookup(root, (void **)items, into,
+					chunk))) {
+//		printf("At 0x%08lx, nfound=%d\n", into, nfound);
+		for (i = 0; i < nfound; i++) {
+			assert(items[i]->index == this_index);
+			this_index++;
+		}
+//		printf("Found 0x%08lx->0x%08lx\n",
+//			items[0]->index, items[nfound-1]->index);
+		into = this_index;
+	}
+	if (chunk)
+		assert(this_index == start + nr);
+	nfound = radix_tree_gang_lookup(root, (void **)items,
+					this_index, chunk);
+	assert(nfound == 0);
+}
+
+/* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */
+int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock,
+			unsigned long start, unsigned long end, unsigned batch,
+			unsigned iftag, unsigned thentag)
+{
+	unsigned long tagged = 0;
+	struct radix_tree_iter iter;
+	void **slot;
+
+	if (batch == 0)
+		batch = 1;
+
+	if (lock)
+		pthread_mutex_lock(lock);
+	radix_tree_for_each_tagged(slot, root, &iter, start, iftag) {
+		if (iter.index > end)
+			break;
+		radix_tree_iter_tag_set(root, &iter, thentag);
+		tagged++;
+		if ((tagged % batch) != 0)
+			continue;
+		slot = radix_tree_iter_resume(slot, &iter);
+		if (lock) {
+			pthread_mutex_unlock(lock);
+			rcu_barrier();
+			pthread_mutex_lock(lock);
+		}
+	}
+	if (lock)
+		pthread_mutex_unlock(lock);
+
+	return tagged;
+}
+
+/* Use the same pattern as find_swap_entry() in mm/shmem.c */
+unsigned long find_item(struct radix_tree_root *root, void *item)
+{
+	struct radix_tree_iter iter;
+	void **slot;
+	unsigned long found = -1;
+	unsigned long checked = 0;
+
+	radix_tree_for_each_slot(slot, root, &iter, 0) {
+		if (*slot == item) {
+			found = iter.index;
+			break;
+		}
+		checked++;
+		if ((checked % 4) != 0)
+			continue;
+		slot = radix_tree_iter_resume(slot, &iter);
+	}
+
+	return found;
+}
+
+static int verify_node(struct radix_tree_node *slot, unsigned int tag,
+			int tagged)
+{
+	int anyset = 0;
+	int i;
+	int j;
+
+	slot = entry_to_node(slot);
+
+	/* Verify consistency at this level */
+	for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) {
+		if (slot->tags[tag][i]) {
+			anyset = 1;
+			break;
+		}
+	}
+	if (tagged != anyset) {
+		printf("tag: %u, shift %u, tagged: %d, anyset: %d\n",
+			tag, slot->shift, tagged, anyset);
+		for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) {
+			printf("tag %d: ", j);
+			for (i = 0; i < RADIX_TREE_TAG_LONGS; i++)
+				printf("%016lx ", slot->tags[j][i]);
+			printf("\n");
+		}
+		return 1;
+	}
+	assert(tagged == anyset);
+
+	/* Go for next level */
+	if (slot->shift > 0) {
+		for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
+			if (slot->slots[i])
+				if (verify_node(slot->slots[i], tag,
+					    !!test_bit(i, slot->tags[tag]))) {
+					printf("Failure at off %d\n", i);
+					for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) {
+						printf("tag %d: ", j);
+						for (i = 0; i < RADIX_TREE_TAG_LONGS; i++)
+							printf("%016lx ", slot->tags[j][i]);
+						printf("\n");
+					}
+					return 1;
+				}
+	}
+	return 0;
+}
+
+void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag)
+{
+	struct radix_tree_node *node = root->rnode;
+	if (!radix_tree_is_internal_node(node))
+		return;
+	verify_node(node, tag, !!root_tag_get(root, tag));
+}
+
+void item_kill_tree(struct radix_tree_root *root)
+{
+	struct radix_tree_iter iter;
+	void **slot;
+	struct item *items[32];
+	int nfound;
+
+	radix_tree_for_each_slot(slot, root, &iter, 0) {
+		if (radix_tree_exceptional_entry(*slot))
+			radix_tree_delete(root, iter.index);
+	}
+
+	while ((nfound = radix_tree_gang_lookup(root, (void **)items, 0, 32))) {
+		int i;
+
+		for (i = 0; i < nfound; i++) {
+			void *ret;
+
+			ret = radix_tree_delete(root, items[i]->index);
+			assert(ret == items[i]);
+			free(items[i]);
+		}
+	}
+	assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0);
+	assert(root->rnode == NULL);
+}
+
+void tree_verify_min_height(struct radix_tree_root *root, int maxindex)
+{
+	unsigned shift;
+	struct radix_tree_node *node = root->rnode;
+	if (!radix_tree_is_internal_node(node)) {
+		assert(maxindex == 0);
+		return;
+	}
+
+	node = entry_to_node(node);
+	assert(maxindex <= node_maxindex(node));
+
+	shift = node->shift;
+	if (shift > 0)
+		assert(maxindex > shift_maxindex(shift - RADIX_TREE_MAP_SHIFT));
+	else
+		assert(maxindex > 0);
+}
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
new file mode 100644
index 000000000..92d901eac
--- /dev/null
+++ b/tools/testing/radix-tree/test.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+
+struct item {
+	struct rcu_head	rcu_head;
+	unsigned long index;
+	unsigned int order;
+};
+
+struct item *item_create(unsigned long index, unsigned int order);
+int __item_insert(struct radix_tree_root *root, struct item *item);
+int item_insert(struct radix_tree_root *root, unsigned long index);
+void item_sanity(struct item *item, unsigned long index);
+int item_insert_order(struct radix_tree_root *root, unsigned long index,
+			unsigned order);
+int item_delete(struct radix_tree_root *root, unsigned long index);
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index);
+struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
+
+void item_check_present(struct radix_tree_root *root, unsigned long index);
+void item_check_absent(struct radix_tree_root *root, unsigned long index);
+void item_gang_check_present(struct radix_tree_root *root,
+			unsigned long start, unsigned long nr,
+			int chunk, int hop);
+void item_full_scan(struct radix_tree_root *root, unsigned long start,
+			unsigned long nr, int chunk);
+void item_kill_tree(struct radix_tree_root *root);
+
+int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
+			unsigned long start, unsigned long end, unsigned batch,
+			unsigned iftag, unsigned thentag);
+unsigned long find_item(struct radix_tree_root *, void *item);
+
+void tag_check(void);
+void multiorder_checks(void);
+void iteration_test(unsigned order, unsigned duration);
+void benchmark(void);
+void idr_checks(void);
+void ida_tests(void);
+
+struct item *
+item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
+struct item *
+item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag);
+int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag);
+void tree_verify_min_height(struct radix_tree_root *root, int maxindex);
+void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag);
+
+extern int nr_allocated;
+
+/* Normally private parts of lib/radix-tree.c */
+struct radix_tree_node *entry_to_node(void *ptr);
+void radix_tree_dump(struct radix_tree_root *root);
+int root_tag_get(struct radix_tree_root *root, unsigned int tag);
+unsigned long node_maxindex(struct radix_tree_node *);
+unsigned long shift_maxindex(unsigned int shift);
+int radix_tree_cpu_dead(unsigned int cpu);
+struct radix_tree_preload {
+	unsigned nr;
+	struct radix_tree_node *nodes;
+};
+extern struct radix_tree_preload radix_tree_preloads;
diff --git a/tools/testing/scatterlist/Makefile b/tools/testing/scatterlist/Makefile
new file mode 100644
index 000000000..933c3a6e4
--- /dev/null
+++ b/tools/testing/scatterlist/Makefile
@@ -0,0 +1,30 @@
+CFLAGS += -I. -I../../include -g -O2 -Wall -fsanitize=address
+LDFLAGS += -fsanitize=address -fsanitize=undefined
+TARGETS = main
+OFILES = main.o scatterlist.o
+
+ifeq ($(BUILD), 32)
+        CFLAGS += -m32
+        LDFLAGS += -m32
+endif
+
+targets: include $(TARGETS)
+
+main: $(OFILES)
+
+clean:
+	$(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h asm/io.h
+	@rmdir asm
+
+scatterlist.c: ../../../lib/scatterlist.c
+	@sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+.PHONY: include
+
+include: ../../../include/linux/scatterlist.h
+	@mkdir -p linux
+	@mkdir -p asm
+	@touch asm/io.h
+	@touch linux/highmem.h
+	@touch linux/kmemleak.h
+	@cp $< linux/scatterlist.h
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
new file mode 100644
index 000000000..6f9ac14aa
--- /dev/null
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -0,0 +1,125 @@
+#ifndef _LINUX_MM_H
+#define _LINUX_MM_H
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+
+typedef unsigned long dma_addr_t;
+
+#define unlikely
+
+#define BUG_ON(x) assert(!(x))
+
+#define WARN_ON(condition) ({                                           \
+	int __ret_warn_on = !!(condition);                              \
+	unlikely(__ret_warn_on);                                        \
+})
+
+#define WARN_ON_ONCE(condition) ({                              \
+	int __ret_warn_on = !!(condition);                      \
+	if (unlikely(__ret_warn_on))                            \
+		assert(0);                                      \
+	unlikely(__ret_warn_on);                                \
+})
+
+#define PAGE_SIZE	(4096)
+#define PAGE_SHIFT	(12)
+#define PAGE_MASK	(~(PAGE_SIZE-1))
+
+#define __ALIGN_KERNEL(x, a)		__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL_MASK(x, mask)	(((x) + (mask)) & ~(mask))
+#define ALIGN(x, a)			__ALIGN_KERNEL((x), (a))
+
+#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
+
+#define offset_in_page(p)	((unsigned long)(p) & ~PAGE_MASK)
+
+#define virt_to_page(x)	((void *)x)
+#define page_address(x)	((void *)x)
+
+static inline unsigned long page_to_phys(struct page *page)
+{
+	assert(0);
+
+	return 0;
+}
+
+#define page_to_pfn(page) ((unsigned long)(page) / PAGE_SIZE)
+#define pfn_to_page(pfn) (void *)((pfn) * PAGE_SIZE)
+#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
+
+#define __min(t1, t2, min1, min2, x, y) ({              \
+	t1 min1 = (x);                                  \
+	t2 min2 = (y);                                  \
+	(void) (&min1 == &min2);                        \
+	min1 < min2 ? min1 : min2; })
+
+#define ___PASTE(a,b) a##b
+#define __PASTE(a,b) ___PASTE(a,b)
+
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+#define min(x, y)                                       \
+	__min(typeof(x), typeof(y),                     \
+	      __UNIQUE_ID(min1_), __UNIQUE_ID(min2_),   \
+	      x, y)
+
+#define min_t(type, x, y)                               \
+	__min(type, type,                               \
+	      __UNIQUE_ID(min1_), __UNIQUE_ID(min2_),   \
+	      x, y)
+
+#define preemptible() (1)
+
+static inline void *kmap(struct page *page)
+{
+	assert(0);
+
+	return NULL;
+}
+
+static inline void *kmap_atomic(struct page *page)
+{
+	assert(0);
+
+	return NULL;
+}
+
+static inline void kunmap(void *addr)
+{
+	assert(0);
+}
+
+static inline void kunmap_atomic(void *addr)
+{
+	assert(0);
+}
+
+static inline unsigned long __get_free_page(unsigned int flags)
+{
+	return (unsigned long)malloc(PAGE_SIZE);
+}
+
+static inline void free_page(unsigned long page)
+{
+	free((void *)page);
+}
+
+static inline void *kmalloc(unsigned int size, unsigned int flags)
+{
+	return malloc(size);
+}
+
+#define kfree(x) free(x)
+
+#define kmemleak_alloc(a, b, c, d)
+#define kmemleak_free(a)
+
+#define PageSlab(p) (0)
+#define flush_kernel_dcache_page(p)
+
+#endif
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
new file mode 100644
index 000000000..0a1464181
--- /dev/null
+++ b/tools/testing/scatterlist/main.c
@@ -0,0 +1,79 @@
+#include <stdio.h>
+#include <assert.h>
+
+#include <linux/scatterlist.h>
+
+#define MAX_PAGES (64)
+
+static void set_pages(struct page **pages, const unsigned *array, unsigned num)
+{
+	unsigned int i;
+
+	assert(num < MAX_PAGES);
+	for (i = 0; i < num; i++)
+		pages[i] = (struct page *)(unsigned long)
+			   ((1 + array[i]) * PAGE_SIZE);
+}
+
+#define pfn(...) (unsigned []){ __VA_ARGS__ }
+
+int main(void)
+{
+	const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
+	struct test {
+		int alloc_ret;
+		unsigned num_pages;
+		unsigned *pfn;
+		unsigned size;
+		unsigned int max_seg;
+		unsigned int expected_segments;
+	} *test, tests[] = {
+		{ -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
+		{ -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
+		{ -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
+		{ 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 },
+		{ 0, 1, pfn(0), 1, sgmax, 1 },
+		{ 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 },
+		{ 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 },
+		{ 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 },
+		{ 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 },
+		{ 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
+		{ 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
+		{ 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 },
+		{ 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
+		{ 0, 0, NULL, 0, 0, 0 },
+	};
+	unsigned int i;
+
+	for (i = 0, test = tests; test->expected_segments; test++, i++) {
+		struct page *pages[MAX_PAGES];
+		struct sg_table st;
+		int ret;
+
+		set_pages(pages, test->pfn, test->num_pages);
+
+		ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages,
+						  0, test->size, test->max_seg,
+						  GFP_KERNEL);
+		assert(ret == test->alloc_ret);
+
+		if (test->alloc_ret)
+			continue;
+
+		assert(st.nents == test->expected_segments);
+		assert(st.orig_nents == test->expected_segments);
+
+		sg_free_table(&st);
+	}
+
+	assert(i == (sizeof(tests) / sizeof(tests[0])) - 1);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
new file mode 100644
index 000000000..917503524
--- /dev/null
+++ b/tools/testing/selftests/.gitignore
@@ -0,0 +1,5 @@
+kselftest
+gpiogpio-event-mon
+gpiogpio-hammer
+gpioinclude/
+gpiolsgpio
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
new file mode 100644
index 000000000..f0017c831
--- /dev/null
+++ b/tools/testing/selftests/Makefile
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: GPL-2.0
+TARGETS = android
+TARGETS += bpf
+TARGETS += breakpoints
+TARGETS += capabilities
+TARGETS += cgroup
+TARGETS += cpufreq
+TARGETS += cpu-hotplug
+TARGETS += efivarfs
+TARGETS += exec
+TARGETS += filesystems
+TARGETS += firmware
+TARGETS += ftrace
+TARGETS += futex
+TARGETS += gpio
+TARGETS += intel_pstate
+TARGETS += ipc
+TARGETS += kcmp
+TARGETS += kvm
+TARGETS += lib
+TARGETS += membarrier
+TARGETS += memfd
+TARGETS += memory-hotplug
+TARGETS += mount
+TARGETS += mqueue
+TARGETS += net
+TARGETS += netfilter
+TARGETS += nsfs
+TARGETS += powerpc
+TARGETS += proc
+TARGETS += pstore
+TARGETS += ptrace
+TARGETS += rseq
+TARGETS += rtc
+TARGETS += seccomp
+TARGETS += sigaltstack
+TARGETS += size
+TARGETS += sparc64
+TARGETS += splice
+TARGETS += static_keys
+TARGETS += sync
+TARGETS += sysctl
+ifneq (1, $(quicktest))
+TARGETS += timers
+endif
+TARGETS += user
+TARGETS += vm
+TARGETS += x86
+TARGETS += zram
+#Please keep the TARGETS list alphabetically sorted
+# Run "make quicktest=1 run_tests" or
+# "make quicktest=1 kselftest" from top level Makefile
+
+TARGETS_HOTPLUG = cpu-hotplug
+TARGETS_HOTPLUG += memory-hotplug
+
+# Clear LDFLAGS and MAKEFLAGS if called from main
+# Makefile to avoid test build failures when test
+# Makefile doesn't have explicit build rules.
+ifeq (1,$(MAKELEVEL))
+override LDFLAGS =
+override MAKEFLAGS =
+endif
+
+ifneq ($(KBUILD_SRC),)
+override LDFLAGS =
+endif
+
+BUILD := $(O)
+ifndef BUILD
+  BUILD := $(KBUILD_OUTPUT)
+endif
+ifndef BUILD
+  BUILD := $(shell pwd)
+endif
+
+# KSFT_TAP_LEVEL is used from KSFT framework to prevent nested TAP header
+# printing from tests. Applicable to run_tests case where run_tests adds
+# TAP header prior running tests and when a test program invokes another
+# with system() call. Export it here to cover override RUN_TESTS defines.
+export KSFT_TAP_LEVEL=`echo 1`
+
+export BUILD
+all:
+	@for TARGET in $(TARGETS); do		\
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+	done;
+
+run_tests: all
+	@for TARGET in $(TARGETS); do \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+	done;
+
+hotplug:
+	@for TARGET in $(TARGETS_HOTPLUG); do \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+	done;
+
+run_hotplug: hotplug
+	@for TARGET in $(TARGETS_HOTPLUG); do \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
+	done;
+
+clean_hotplug:
+	@for TARGET in $(TARGETS_HOTPLUG); do \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+	done;
+
+run_pstore_crash:
+	make -C pstore run_crash
+
+INSTALL_PATH ?= install
+INSTALL_PATH := $(abspath $(INSTALL_PATH))
+ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+
+install:
+ifdef INSTALL_PATH
+	@# Ask all targets to install their files
+	mkdir -p $(INSTALL_PATH)
+	@for TARGET in $(TARGETS); do \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+	done;
+
+	@# Ask all targets to emit their test scripts
+	echo "#!/bin/sh" > $(ALL_SCRIPT)
+	echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT)
+	echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT)
+	echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
+	echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT)
+	echo "  OUTPUT=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT)
+	echo "  cat /dev/null > \$$OUTPUT" >> $(ALL_SCRIPT)
+	echo "else" >> $(ALL_SCRIPT)
+	echo "  OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
+	echo "fi" >> $(ALL_SCRIPT)
+	echo "export KSFT_TAP_LEVEL=1" >> $(ALL_SCRIPT)
+	echo "export skip=4" >> $(ALL_SCRIPT)
+
+	for TARGET in $(TARGETS); do \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		echo "echo ; echo TAP version 13" >> $(ALL_SCRIPT);	\
+		echo "echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
+		echo "echo ========================================" >> $(ALL_SCRIPT); \
+		echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
+		echo "cd $$TARGET" >> $(ALL_SCRIPT); \
+		make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+		echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+	done;
+
+	chmod u+x $(ALL_SCRIPT)
+else
+	$(error Error: set INSTALL_PATH to use install)
+endif
+
+clean:
+	@for TARGET in $(TARGETS); do \
+		BUILD_TARGET=$$BUILD/$$TARGET;	\
+		make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+	done;
+
+.PHONY: all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
new file mode 100644
index 000000000..72c25a3cb
--- /dev/null
+++ b/tools/testing/selftests/android/Makefile
@@ -0,0 +1,38 @@
+SUBDIRS := ion
+
+TEST_PROGS := run.sh
+
+.PHONY: all clean
+
+include ../lib.mk
+
+all:
+	@for DIR in $(SUBDIRS); do		\
+		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+		#SUBDIR test prog name should be in the form: SUBDIR_test.sh \
+		TEST=$$DIR"_test.sh"; \
+		if [ -e $$DIR/$$TEST ]; then \
+			rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+		fi \
+	done
+
+override define INSTALL_RULE
+	mkdir -p $(INSTALL_PATH)
+	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+	@for SUBDIR in $(SUBDIRS); do \
+		BUILD_TARGET=$(OUTPUT)/$$SUBDIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+	done;
+endef
+
+override define CLEAN
+	@for DIR in $(SUBDIRS); do		\
+		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+	done
+endef
diff --git a/tools/testing/selftests/android/config b/tools/testing/selftests/android/config
new file mode 100644
index 000000000..b4ad748a9
--- /dev/null
+++ b/tools/testing/selftests/android/config
@@ -0,0 +1,5 @@
+CONFIG_ANDROID=y
+CONFIG_STAGING=y
+CONFIG_ION=y
+CONFIG_ION_SYSTEM_HEAP=y
+CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore
new file mode 100644
index 000000000..95e8f4561
--- /dev/null
+++ b/tools/testing/selftests/android/ion/.gitignore
@@ -0,0 +1,3 @@
+ionapp_export
+ionapp_import
+ionmap_test
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
new file mode 100644
index 000000000..88cfe88e4
--- /dev/null
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -0,0 +1,19 @@
+
+INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/
+CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
+
+TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test
+
+all: $(TEST_GEN_FILES)
+
+$(TEST_GEN_FILES): ipcsocket.c ionutils.c
+
+TEST_PROGS := ion_test.sh
+
+KSFT_KHDR_INSTALL := 1
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
+$(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
+$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c
diff --git a/tools/testing/selftests/android/ion/README b/tools/testing/selftests/android/ion/README
new file mode 100644
index 000000000..21783e9c4
--- /dev/null
+++ b/tools/testing/selftests/android/ion/README
@@ -0,0 +1,101 @@
+ION BUFFER SHARING UTILITY
+==========================
+File: ion_test.sh : Utility to test ION driver buffer sharing mechanism.
+Author: Pintu Kumar <pintu.ping@gmail.com>
+
+Introduction:
+-------------
+This is a test utility to verify ION buffer sharing in user space
+between 2 independent processes.
+It uses unix domain socket (with SCM_RIGHTS) as IPC to transfer an FD to
+another process to share the same buffer.
+This utility demonstrates how ION buffer sharing can be implemented between
+two user space processes, using various heap types.
+The following heap types are supported by ION driver.
+ION_HEAP_TYPE_SYSTEM (0)
+ION_HEAP_TYPE_SYSTEM_CONTIG (1)
+ION_HEAP_TYPE_CARVEOUT (2)
+ION_HEAP_TYPE_CHUNK (3)
+ION_HEAP_TYPE_DMA (4)
+
+By default only the SYSTEM and SYSTEM_CONTIG heaps are supported.
+Each heap is associated with the respective heap id.
+This utility is designed in the form of client/server program.
+The server part (ionapp_export) is the exporter of the buffer.
+It is responsible for creating an ION client, allocating the buffer based on
+the heap id, writing some data to this buffer and then exporting the FD
+(associated with this buffer) to another process using socket IPC.
+This FD is called as buffer FD (which is different than the ION client FD).
+
+The client part (ionapp_import) is the importer of the buffer.
+It retrives the FD from the socket data and installs into its address space.
+This new FD internally points to the same kernel buffer.
+So first it reads the data that is stored in this buffer and prints it.
+Then it writes the different size of data (it could be different data) to the
+same buffer.
+Finally the buffer FD must be closed by both the exporter and importer.
+Thus the same kernel buffer is shared among two user space processes using
+ION driver and only one time allocation.
+
+Prerequisite:
+-------------
+This utility works only if /dev/ion interface is present.
+The following configs needs to be enabled in kernel to include ion driver.
+CONFIG_ANDROID=y
+CONFIG_STAGING=y
+CONFIG_ION=y
+CONFIG_ION_SYSTEM_HEAP=y
+
+This utility requires to be run as root user.
+
+
+Compile and test:
+-----------------
+This utility is made to be run as part of kselftest framework in kernel.
+To compile and run using kselftest you can simply do the following from the
+kernel top directory.
+linux$ make TARGETS=android kselftest
+Or you can also use:
+linux$ make -C tools/testing/selftests TARGETS=android run_tests
+Using the selftest it can directly execute the ion_test.sh script to test the
+buffer sharing using ion system heap.
+Currently the heap size is hard coded as just 10 bytes inside this script.
+You need to be a root user to run under selftest.
+
+You can also compile and test manually using the following steps:
+ion$ make
+These will generate 2 executable: ionapp_export, ionapp_import
+Now you can run the export and import manually by specifying the heap type
+and the heap size.
+You can also directly execute the shell script to run the test automatically.
+Simply use the following command to run the test.
+ion$ sudo ./ion_test.sh
+
+Test Results:
+-------------
+The utility is verified on Ubuntu-32 bit system with Linux Kernel 4.14.
+Here is the snapshot of the test result using kselftest.
+
+linux# make TARGETS=android kselftest
+heap_type: 0, heap_size: 10
+--------------------------------------
+heap type: 0
+  heap id: 1
+heap name: ion_system_heap
+--------------------------------------
+Fill buffer content:
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
+Sharing fd: 6, Client fd: 5
+<ion_close_buffer_fd>: buffer release successfully....
+Received buffer fd: 4
+Read buffer content:
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0x0 0x0 0x0 0x0 0x0 0x0
+0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
+Fill buffer content:
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
+0xfd 0xfd
+<ion_close_buffer_fd>: buffer release successfully....
+ion_test.sh: heap_type: 0 - [PASS]
+
+ion_test.sh: done
diff --git a/tools/testing/selftests/android/ion/ion.h b/tools/testing/selftests/android/ion/ion.h
new file mode 100644
index 000000000..f7021ac51
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ion.h
@@ -0,0 +1,143 @@
+/*
+ * ion.h
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* This file is copied from drivers/staging/android/uapi/ion.h
+ * This local copy is required for the selftest to pass, when build
+ * outside the kernel source tree.
+ * Please keep this file in sync with its original file until the
+ * ion driver is moved outside the staging tree.
+ */
+
+#ifndef _UAPI_LINUX_ION_H
+#define _UAPI_LINUX_ION_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/**
+ * enum ion_heap_types - list of all possible types of heaps
+ * @ION_HEAP_TYPE_SYSTEM:	 memory allocated via vmalloc
+ * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc
+ * @ION_HEAP_TYPE_CARVEOUT:	 memory allocated from a prereserved
+ *				 carveout heap, allocations are physically
+ *				 contiguous
+ * @ION_HEAP_TYPE_DMA:		 memory allocated via DMA API
+ * @ION_NUM_HEAPS:		 helper for iterating over heaps, a bit mask
+ *				 is used to identify the heaps, so only 32
+ *				 total heap types are supported
+ */
+enum ion_heap_type {
+	ION_HEAP_TYPE_SYSTEM,
+	ION_HEAP_TYPE_SYSTEM_CONTIG,
+	ION_HEAP_TYPE_CARVEOUT,
+	ION_HEAP_TYPE_CHUNK,
+	ION_HEAP_TYPE_DMA,
+	ION_HEAP_TYPE_CUSTOM, /*
+			       * must be last so device specific heaps always
+			       * are at the end of this enum
+			       */
+};
+
+#define ION_NUM_HEAP_IDS		(sizeof(unsigned int) * 8)
+
+/**
+ * allocation flags - the lower 16 bits are used by core ion, the upper 16
+ * bits are reserved for use by the heaps themselves.
+ */
+
+/*
+ * mappings of this buffer should be cached, ion will do cache maintenance
+ * when the buffer is mapped for dma
+ */
+#define ION_FLAG_CACHED 1
+
+/**
+ * DOC: Ion Userspace API
+ *
+ * create a client by opening /dev/ion
+ * most operations handled via following ioctls
+ *
+ */
+
+/**
+ * struct ion_allocation_data - metadata passed from userspace for allocations
+ * @len:		size of the allocation
+ * @heap_id_mask:	mask of heap ids to allocate from
+ * @flags:		flags passed to heap
+ * @handle:		pointer that will be populated with a cookie to use to
+ *			refer to this allocation
+ *
+ * Provided by userspace as an argument to the ioctl
+ */
+struct ion_allocation_data {
+	__u64 len;
+	__u32 heap_id_mask;
+	__u32 flags;
+	__u32 fd;
+	__u32 unused;
+};
+
+#define MAX_HEAP_NAME			32
+
+/**
+ * struct ion_heap_data - data about a heap
+ * @name - first 32 characters of the heap name
+ * @type - heap type
+ * @heap_id - heap id for the heap
+ */
+struct ion_heap_data {
+	char name[MAX_HEAP_NAME];
+	__u32 type;
+	__u32 heap_id;
+	__u32 reserved0;
+	__u32 reserved1;
+	__u32 reserved2;
+};
+
+/**
+ * struct ion_heap_query - collection of data about all heaps
+ * @cnt - total number of heaps to be copied
+ * @heaps - buffer to copy heap data
+ */
+struct ion_heap_query {
+	__u32 cnt; /* Total number of heaps to be copied */
+	__u32 reserved0; /* align to 64bits */
+	__u64 heaps; /* buffer to be populated */
+	__u32 reserved1;
+	__u32 reserved2;
+};
+
+#define ION_IOC_MAGIC		'I'
+
+/**
+ * DOC: ION_IOC_ALLOC - allocate memory
+ *
+ * Takes an ion_allocation_data struct and returns it with the handle field
+ * populated with the opaque handle for the allocation.
+ */
+#define ION_IOC_ALLOC		_IOWR(ION_IOC_MAGIC, 0, \
+				      struct ion_allocation_data)
+
+/**
+ * DOC: ION_IOC_HEAP_QUERY - information about available heaps
+ *
+ * Takes an ion_heap_query structure and populates information about
+ * available Ion heaps.
+ */
+#define ION_IOC_HEAP_QUERY     _IOWR(ION_IOC_MAGIC, 8, \
+					struct ion_heap_query)
+
+#endif /* _UAPI_LINUX_ION_H */
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
new file mode 100755
index 000000000..69e676cfc
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ion_test.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+heapsize=4096
+TCID="ion_test.sh"
+errcode=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+run_test()
+{
+	heaptype=$1
+	./ionapp_export -i $heaptype -s $heapsize &
+	sleep 1
+	./ionapp_import
+	if [ $? -ne 0 ]; then
+		echo "$TCID: heap_type: $heaptype - [FAIL]"
+		errcode=1
+	else
+		echo "$TCID: heap_type: $heaptype - [PASS]"
+	fi
+	sleep 1
+	echo ""
+}
+
+check_root()
+{
+	uid=$(id -u)
+	if [ $uid -ne 0 ]; then
+		echo $TCID: must be run as root >&2
+		exit $ksft_skip
+	fi
+}
+
+check_device()
+{
+	DEVICE=/dev/ion
+	if [ ! -e $DEVICE ]; then
+		echo $TCID: No $DEVICE device found >&2
+		echo $TCID: May be CONFIG_ION is not set >&2
+		exit $ksft_skip
+	fi
+}
+
+main_function()
+{
+	check_device
+	check_root
+
+	# ION_SYSTEM_HEAP TEST
+	run_test 0
+	# ION_SYSTEM_CONTIG_HEAP TEST
+	run_test 1
+}
+
+main_function
+echo "$TCID: done"
+exit $errcode
diff --git a/tools/testing/selftests/android/ion/ionapp_export.c b/tools/testing/selftests/android/ion/ionapp_export.c
new file mode 100644
index 000000000..b5fa0a2dc
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionapp_export.c
@@ -0,0 +1,136 @@
+/*
+ * ionapp_export.c
+ *
+ * It is a user space utility to create and export android
+ * ion memory buffer fd to another process using unix domain socket as IPC.
+ * This acts like a server for ionapp_import(client).
+ * So, this server has to be started first before the client.
+ *
+ * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/time.h>
+#include "ionutils.h"
+#include "ipcsocket.h"
+
+
+void print_usage(int argc, char *argv[])
+{
+	printf("Usage: %s [-h <help>] [-i <heap id>] [-s <size in bytes>]\n",
+		argv[0]);
+}
+
+int main(int argc, char *argv[])
+{
+	int opt, ret, status, heapid;
+	int sockfd, client_fd, shared_fd;
+	unsigned char *map_buf;
+	unsigned long map_len, heap_type, heap_size, flags;
+	struct ion_buffer_info info;
+	struct socket_info skinfo;
+
+	if (argc < 2) {
+		print_usage(argc, argv);
+		return -1;
+	}
+
+	heap_size = 0;
+	flags = 0;
+	heap_type = ION_HEAP_TYPE_SYSTEM;
+
+	while ((opt = getopt(argc, argv, "hi:s:")) != -1) {
+		switch (opt) {
+		case 'h':
+			print_usage(argc, argv);
+			exit(0);
+			break;
+		case 'i':
+			heapid = atoi(optarg);
+			switch (heapid) {
+			case 0:
+				heap_type = ION_HEAP_TYPE_SYSTEM;
+				break;
+			case 1:
+				heap_type = ION_HEAP_TYPE_SYSTEM_CONTIG;
+				break;
+			default:
+				printf("ERROR: heap type not supported\n");
+				exit(1);
+			}
+			break;
+		case 's':
+			heap_size = atoi(optarg);
+			break;
+		default:
+			print_usage(argc, argv);
+			exit(1);
+			break;
+		}
+	}
+
+	if (heap_size <= 0) {
+		printf("heap_size cannot be 0\n");
+		print_usage(argc, argv);
+		exit(1);
+	}
+
+	printf("heap_type: %ld, heap_size: %ld\n", heap_type, heap_size);
+	info.heap_type = heap_type;
+	info.heap_size = heap_size;
+	info.flag_type = flags;
+
+	/* This is server: open the socket connection first */
+	/* Here; 1 indicates server or exporter */
+	status = opensocket(&sockfd, SOCKET_NAME, 1);
+	if (status < 0) {
+		fprintf(stderr, "<%s>: Failed opensocket.\n", __func__);
+		goto err_socket;
+	}
+	skinfo.sockfd = sockfd;
+
+	ret = ion_export_buffer_fd(&info);
+	if (ret < 0) {
+		fprintf(stderr, "FAILED: ion_get_buffer_fd\n");
+		goto err_export;
+	}
+	client_fd = info.ionfd;
+	shared_fd = info.buffd;
+	map_buf = info.buffer;
+	map_len = info.buflen;
+	write_buffer(map_buf, map_len);
+
+	/* share ion buf fd with other user process */
+	printf("Sharing fd: %d, Client fd: %d\n", shared_fd, client_fd);
+	skinfo.datafd = shared_fd;
+	skinfo.buflen = map_len;
+
+	ret = socket_send_fd(&skinfo);
+	if (ret < 0) {
+		fprintf(stderr, "FAILED: socket_send_fd\n");
+		goto err_send;
+	}
+
+err_send:
+err_export:
+	ion_close_buffer_fd(&info);
+
+err_socket:
+	closesocket(sockfd, SOCKET_NAME);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/android/ion/ionapp_import.c b/tools/testing/selftests/android/ion/ionapp_import.c
new file mode 100644
index 000000000..ae2d704cf
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionapp_import.c
@@ -0,0 +1,88 @@
+/*
+ * ionapp_import.c
+ *
+ * It is a user space utility to receive android ion memory buffer fd
+ * over unix domain socket IPC that can be exported by ionapp_export.
+ * This acts like a client for ionapp_export.
+ *
+ * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include "ionutils.h"
+#include "ipcsocket.h"
+
+
+int main(void)
+{
+	int ret, status;
+	int sockfd, shared_fd;
+	unsigned char *map_buf;
+	unsigned long map_len;
+	struct ion_buffer_info info;
+	struct socket_info skinfo;
+
+	/* This is the client part. Here 0 means client or importer */
+	status = opensocket(&sockfd, SOCKET_NAME, 0);
+	if (status < 0) {
+		fprintf(stderr, "No exporter exists...\n");
+		ret = status;
+		goto err_socket;
+	}
+
+	skinfo.sockfd = sockfd;
+
+	ret = socket_receive_fd(&skinfo);
+	if (ret < 0) {
+		fprintf(stderr, "Failed: socket_receive_fd\n");
+		goto err_recv;
+	}
+
+	shared_fd = skinfo.datafd;
+	printf("Received buffer fd: %d\n", shared_fd);
+	if (shared_fd <= 0) {
+		fprintf(stderr, "ERROR: improper buf fd\n");
+		ret = -1;
+		goto err_fd;
+	}
+
+	memset(&info, 0, sizeof(info));
+	info.buffd = shared_fd;
+	info.buflen = ION_BUFFER_LEN;
+
+	ret = ion_import_buffer_fd(&info);
+	if (ret < 0) {
+		fprintf(stderr, "Failed: ion_use_buffer_fd\n");
+		goto err_import;
+	}
+
+	map_buf = info.buffer;
+	map_len = info.buflen;
+	read_buffer(map_buf, map_len);
+
+	/* Write probably new data to the same buffer again */
+	map_len = ION_BUFFER_LEN;
+	write_buffer(map_buf, map_len);
+
+err_import:
+	ion_close_buffer_fd(&info);
+err_fd:
+err_recv:
+err_socket:
+	closesocket(sockfd, SOCKET_NAME);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c
new file mode 100644
index 000000000..dab36b06b
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionmap_test.c
@@ -0,0 +1,136 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm.h>
+
+#include "ion.h"
+#include "ionutils.h"
+
+int check_vgem(int fd)
+{
+	drm_version_t version = { 0 };
+	char name[5];
+	int ret;
+
+	version.name_len = 4;
+	version.name = name;
+
+	ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
+	if (ret)
+		return 1;
+
+	return strcmp(name, "vgem");
+}
+
+int open_vgem(void)
+{
+	int i, fd;
+	const char *drmstr = "/dev/dri/card";
+
+	fd = -1;
+	for (i = 0; i < 16; i++) {
+		char name[80];
+
+		sprintf(name, "%s%u", drmstr, i);
+
+		fd = open(name, O_RDWR);
+		if (fd < 0)
+			continue;
+
+		if (check_vgem(fd)) {
+			close(fd);
+			continue;
+		} else {
+			break;
+		}
+
+	}
+	return fd;
+}
+
+int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle)
+{
+	struct drm_prime_handle import_handle = { 0 };
+	int ret;
+
+	import_handle.fd = dma_buf_fd;
+	import_handle.flags = 0;
+	import_handle.handle = 0;
+
+	ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle);
+	if (ret == 0)
+		*handle = import_handle.handle;
+	return ret;
+}
+
+void close_handle(int vgem_fd, uint32_t handle)
+{
+	struct drm_gem_close close = { 0 };
+
+	close.handle = handle;
+	ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+int main()
+{
+	int ret, vgem_fd;
+	struct ion_buffer_info info;
+	uint32_t handle = 0;
+	struct dma_buf_sync sync = { 0 };
+
+	info.heap_type = ION_HEAP_TYPE_SYSTEM;
+	info.heap_size = 4096;
+	info.flag_type = ION_FLAG_CACHED;
+
+	ret = ion_export_buffer_fd(&info);
+	if (ret < 0) {
+		printf("ion buffer alloc failed\n");
+		return -1;
+	}
+
+	vgem_fd = open_vgem();
+	if (vgem_fd < 0) {
+		ret = vgem_fd;
+		printf("Failed to open vgem\n");
+		goto out_ion;
+	}
+
+	ret = import_vgem_fd(vgem_fd, info.buffd, &handle);
+
+	if (ret < 0) {
+		printf("Failed to import buffer\n");
+		goto out_vgem;
+	}
+
+	sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
+	ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+	if (ret)
+		printf("sync start failed %d\n", errno);
+
+	memset(info.buffer, 0xff, 4096);
+
+	sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
+	ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+	if (ret)
+		printf("sync end failed %d\n", errno);
+
+	close_handle(vgem_fd, handle);
+	ret = 0;
+
+out_vgem:
+	close(vgem_fd);
+out_ion:
+	ion_close_buffer_fd(&info);
+	printf("done.\n");
+	return ret;
+}
diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c
new file mode 100644
index 000000000..7d1d37c4e
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionutils.c
@@ -0,0 +1,253 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+//#include <stdint.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "ionutils.h"
+#include "ipcsocket.h"
+
+
+void write_buffer(void *buffer, unsigned long len)
+{
+	int i;
+	unsigned char *ptr = (unsigned char *)buffer;
+
+	if (!ptr) {
+		fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
+		return;
+	}
+
+	printf("Fill buffer content:\n");
+	memset(ptr, 0xfd, len);
+	for (i = 0; i < len; i++)
+		printf("0x%x ", ptr[i]);
+	printf("\n");
+}
+
+void read_buffer(void *buffer, unsigned long len)
+{
+	int i;
+	unsigned char *ptr = (unsigned char *)buffer;
+
+	if (!ptr) {
+		fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
+		return;
+	}
+
+	printf("Read buffer content:\n");
+	for (i = 0; i < len; i++)
+		printf("0x%x ", ptr[i]);
+	printf("\n");
+}
+
+int ion_export_buffer_fd(struct ion_buffer_info *ion_info)
+{
+	int i, ret, ionfd, buffer_fd;
+	unsigned int heap_id;
+	unsigned long maplen;
+	unsigned char *map_buffer;
+	struct ion_allocation_data alloc_data;
+	struct ion_heap_query query;
+	struct ion_heap_data heap_data[MAX_HEAP_COUNT];
+
+	if (!ion_info) {
+		fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
+		return -1;
+	}
+
+	/* Create an ION client */
+	ionfd = open(ION_DEVICE, O_RDWR);
+	if (ionfd < 0) {
+		fprintf(stderr, "<%s>: Failed to open ion client: %s\n",
+			__func__, strerror(errno));
+		return -1;
+	}
+
+	memset(&query, 0, sizeof(query));
+	query.cnt = MAX_HEAP_COUNT;
+	query.heaps = (unsigned long int)&heap_data[0];
+	/* Query ION heap_id_mask from ION heap */
+	ret = ioctl(ionfd, ION_IOC_HEAP_QUERY, &query);
+	if (ret < 0) {
+		fprintf(stderr, "<%s>: Failed: ION_IOC_HEAP_QUERY: %s\n",
+			__func__, strerror(errno));
+		goto err_query;
+	}
+
+	heap_id = MAX_HEAP_COUNT + 1;
+	for (i = 0; i < query.cnt; i++) {
+		if (heap_data[i].type == ion_info->heap_type) {
+			heap_id = heap_data[i].heap_id;
+			break;
+		}
+	}
+
+	if (heap_id > MAX_HEAP_COUNT) {
+		fprintf(stderr, "<%s>: ERROR: heap type does not exists\n",
+			__func__);
+		goto err_heap;
+	}
+
+	alloc_data.len = ion_info->heap_size;
+	alloc_data.heap_id_mask = 1 << heap_id;
+	alloc_data.flags = ion_info->flag_type;
+
+	/* Allocate memory for this ION client as per heap_type */
+	ret = ioctl(ionfd, ION_IOC_ALLOC, &alloc_data);
+	if (ret < 0) {
+		fprintf(stderr, "<%s>: Failed: ION_IOC_ALLOC: %s\n",
+			__func__, strerror(errno));
+		goto err_alloc;
+	}
+
+	/* This will return a valid buffer fd */
+	buffer_fd = alloc_data.fd;
+	maplen = alloc_data.len;
+
+	if (buffer_fd < 0 || maplen <= 0) {
+		fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
+			__func__, buffer_fd, maplen);
+		goto err_fd_data;
+	}
+
+	/* Create memory mapped buffer for the buffer fd */
+	map_buffer = (unsigned char *)mmap(NULL, maplen, PROT_READ|PROT_WRITE,
+			MAP_SHARED, buffer_fd, 0);
+	if (map_buffer == MAP_FAILED) {
+		fprintf(stderr, "<%s>: Failed: mmap: %s\n",
+			__func__, strerror(errno));
+		goto err_mmap;
+	}
+
+	ion_info->ionfd = ionfd;
+	ion_info->buffd = buffer_fd;
+	ion_info->buffer = map_buffer;
+	ion_info->buflen = maplen;
+
+	return 0;
+
+	munmap(map_buffer, maplen);
+
+err_fd_data:
+err_mmap:
+	/* in case of error: close the buffer fd */
+	if (buffer_fd)
+		close(buffer_fd);
+
+err_query:
+err_heap:
+err_alloc:
+	/* In case of error: close the ion client fd */
+	if (ionfd)
+		close(ionfd);
+
+	return -1;
+}
+
+int ion_import_buffer_fd(struct ion_buffer_info *ion_info)
+{
+	int buffd;
+	unsigned char *map_buf;
+	unsigned long map_len;
+
+	if (!ion_info) {
+		fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
+		return -1;
+	}
+
+	map_len = ion_info->buflen;
+	buffd = ion_info->buffd;
+
+	if (buffd < 0 || map_len <= 0) {
+		fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
+			__func__, buffd, map_len);
+		goto err_buffd;
+	}
+
+	map_buf = (unsigned char *)mmap(NULL, map_len, PROT_READ|PROT_WRITE,
+			MAP_SHARED, buffd, 0);
+	if (map_buf == MAP_FAILED) {
+		printf("<%s>: Failed - mmap: %s\n",
+			__func__, strerror(errno));
+		goto err_mmap;
+	}
+
+	ion_info->buffer = map_buf;
+	ion_info->buflen = map_len;
+
+	return 0;
+
+err_mmap:
+	if (buffd)
+		close(buffd);
+
+err_buffd:
+	return -1;
+}
+
+void ion_close_buffer_fd(struct ion_buffer_info *ion_info)
+{
+	if (ion_info) {
+		/* unmap the buffer properly in the end */
+		munmap(ion_info->buffer, ion_info->buflen);
+		/* close the buffer fd */
+		if (ion_info->buffd > 0)
+			close(ion_info->buffd);
+		/* Finally, close the client fd */
+		if (ion_info->ionfd > 0)
+			close(ion_info->ionfd);
+	}
+}
+
+int socket_send_fd(struct socket_info *info)
+{
+	int status;
+	int fd, sockfd;
+	struct socketdata skdata;
+
+	if (!info) {
+		fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
+		return -1;
+	}
+
+	sockfd = info->sockfd;
+	fd = info->datafd;
+	memset(&skdata, 0, sizeof(skdata));
+	skdata.data = fd;
+	skdata.len = sizeof(skdata.data);
+	status = sendtosocket(sockfd, &skdata);
+	if (status < 0) {
+		fprintf(stderr, "<%s>: Failed: sendtosocket\n", __func__);
+		return -1;
+	}
+
+	return 0;
+}
+
+int socket_receive_fd(struct socket_info *info)
+{
+	int status;
+	int fd, sockfd;
+	struct socketdata skdata;
+
+	if (!info) {
+		fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
+		return -1;
+	}
+
+	sockfd = info->sockfd;
+	memset(&skdata, 0, sizeof(skdata));
+	status = receivefromsocket(sockfd, &skdata);
+	if (status < 0) {
+		fprintf(stderr, "<%s>: Failed: receivefromsocket\n", __func__);
+		return -1;
+	}
+
+	fd = (int)skdata.data;
+	info->datafd = fd;
+
+	return status;
+}
diff --git a/tools/testing/selftests/android/ion/ionutils.h b/tools/testing/selftests/android/ion/ionutils.h
new file mode 100644
index 000000000..9941eb858
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionutils.h
@@ -0,0 +1,55 @@
+#ifndef __ION_UTILS_H
+#define __ION_UTILS_H
+
+#include "ion.h"
+
+#define SOCKET_NAME "ion_socket"
+#define ION_DEVICE "/dev/ion"
+
+#define ION_BUFFER_LEN	4096
+#define MAX_HEAP_COUNT	ION_HEAP_TYPE_CUSTOM
+
+struct socket_info {
+	int sockfd;
+	int datafd;
+	unsigned long buflen;
+};
+
+struct ion_buffer_info {
+	int ionfd;
+	int buffd;
+	unsigned int heap_type;
+	unsigned int flag_type;
+	unsigned long heap_size;
+	unsigned long buflen;
+	unsigned char *buffer;
+};
+
+
+/* This is used to fill the data into the mapped buffer */
+void write_buffer(void *buffer, unsigned long len);
+
+/* This is used to read the data from the exported buffer */
+void read_buffer(void *buffer, unsigned long len);
+
+/* This is used to create an ION buffer FD for the kernel buffer
+ * So you can export this same buffer to others in the form of FD
+ */
+int ion_export_buffer_fd(struct ion_buffer_info *ion_info);
+
+/* This is used to import or map an exported FD.
+ * So we point to same buffer without making a copy. Hence zero-copy.
+ */
+int ion_import_buffer_fd(struct ion_buffer_info *ion_info);
+
+/* This is used to close all references for the ION client */
+void ion_close_buffer_fd(struct ion_buffer_info *ion_info);
+
+/* This is used to send FD to another process using socket IPC */
+int socket_send_fd(struct socket_info *skinfo);
+
+/* This is used to receive FD from another process using socket IPC */
+int socket_receive_fd(struct socket_info *skinfo);
+
+
+#endif
diff --git a/tools/testing/selftests/android/ion/ipcsocket.c b/tools/testing/selftests/android/ion/ipcsocket.c
new file mode 100644
index 000000000..7dc521002
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ipcsocket.c
@@ -0,0 +1,227 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/un.h>
+#include <errno.h>
+
+#include "ipcsocket.h"
+
+
+int opensocket(int *sockfd, const char *name, int connecttype)
+{
+	int ret, temp = 1;
+
+	if (!name || strlen(name) > MAX_SOCK_NAME_LEN) {
+		fprintf(stderr, "<%s>: Invalid socket name.\n", __func__);
+		return -1;
+	}
+
+	ret = socket(PF_LOCAL, SOCK_STREAM, 0);
+	if (ret < 0) {
+		fprintf(stderr, "<%s>: Failed socket: <%s>\n",
+			__func__, strerror(errno));
+		return ret;
+	}
+
+	*sockfd = ret;
+	if (setsockopt(*sockfd, SOL_SOCKET, SO_REUSEADDR,
+		(char *)&temp, sizeof(int)) < 0) {
+		fprintf(stderr, "<%s>: Failed setsockopt: <%s>\n",
+		__func__, strerror(errno));
+		goto err;
+	}
+
+	sprintf(sock_name, "/tmp/%s", name);
+
+	if (connecttype == 1) {
+		/* This is for Server connection */
+		struct sockaddr_un skaddr;
+		int clientfd;
+		socklen_t sklen;
+
+		unlink(sock_name);
+		memset(&skaddr, 0, sizeof(skaddr));
+		skaddr.sun_family = AF_LOCAL;
+		strcpy(skaddr.sun_path, sock_name);
+
+		ret = bind(*sockfd, (struct sockaddr *)&skaddr,
+			SUN_LEN(&skaddr));
+		if (ret < 0) {
+			fprintf(stderr, "<%s>: Failed bind: <%s>\n",
+			__func__, strerror(errno));
+			goto err;
+		}
+
+		ret = listen(*sockfd, 5);
+		if (ret < 0) {
+			fprintf(stderr, "<%s>: Failed listen: <%s>\n",
+			__func__, strerror(errno));
+			goto err;
+		}
+
+		memset(&skaddr, 0, sizeof(skaddr));
+		sklen = sizeof(skaddr);
+
+		ret = accept(*sockfd, (struct sockaddr *)&skaddr,
+			(socklen_t *)&sklen);
+		if (ret < 0) {
+			fprintf(stderr, "<%s>: Failed accept: <%s>\n",
+			__func__, strerror(errno));
+			goto err;
+		}
+
+		clientfd = ret;
+		*sockfd = clientfd;
+	} else {
+		/* This is for client connection */
+		struct sockaddr_un skaddr;
+
+		memset(&skaddr, 0, sizeof(skaddr));
+		skaddr.sun_family = AF_LOCAL;
+		strcpy(skaddr.sun_path, sock_name);
+
+		ret = connect(*sockfd, (struct sockaddr *)&skaddr,
+			SUN_LEN(&skaddr));
+		if (ret < 0) {
+			fprintf(stderr, "<%s>: Failed connect: <%s>\n",
+			__func__, strerror(errno));
+			goto err;
+		}
+	}
+
+	return 0;
+
+err:
+	if (*sockfd)
+		close(*sockfd);
+
+	return ret;
+}
+
+int sendtosocket(int sockfd, struct socketdata *skdata)
+{
+	int ret, buffd;
+	unsigned int len;
+	char cmsg_b[CMSG_SPACE(sizeof(int))];
+	struct cmsghdr *cmsg;
+	struct msghdr msgh;
+	struct iovec iov;
+	struct timeval timeout;
+	fd_set selFDs;
+
+	if (!skdata) {
+		fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
+		return -1;
+	}
+
+	FD_ZERO(&selFDs);
+	FD_SET(0, &selFDs);
+	FD_SET(sockfd, &selFDs);
+	timeout.tv_sec = 20;
+	timeout.tv_usec = 0;
+
+	ret = select(sockfd+1, NULL, &selFDs, NULL, &timeout);
+	if (ret < 0) {
+		fprintf(stderr, "<%s>: Failed select: <%s>\n",
+		__func__, strerror(errno));
+		return -1;
+	}
+
+	if (FD_ISSET(sockfd, &selFDs)) {
+		buffd = skdata->data;
+		len = skdata->len;
+		memset(&msgh, 0, sizeof(msgh));
+		msgh.msg_control = &cmsg_b;
+		msgh.msg_controllen = CMSG_LEN(len);
+		iov.iov_base = "OK";
+		iov.iov_len = 2;
+		msgh.msg_iov = &iov;
+		msgh.msg_iovlen = 1;
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		cmsg->cmsg_len = CMSG_LEN(len);
+		memcpy(CMSG_DATA(cmsg), &buffd, len);
+
+		ret = sendmsg(sockfd, &msgh, MSG_DONTWAIT);
+		if (ret < 0) {
+			fprintf(stderr, "<%s>: Failed sendmsg: <%s>\n",
+			__func__, strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int receivefromsocket(int sockfd, struct socketdata *skdata)
+{
+	int ret, buffd;
+	unsigned int len = 0;
+	char cmsg_b[CMSG_SPACE(sizeof(int))];
+	struct cmsghdr *cmsg;
+	struct msghdr msgh;
+	struct iovec iov;
+	fd_set recvFDs;
+	char data[32];
+
+	if (!skdata) {
+		fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
+		return -1;
+	}
+
+	FD_ZERO(&recvFDs);
+	FD_SET(0, &recvFDs);
+	FD_SET(sockfd, &recvFDs);
+
+	ret = select(sockfd+1, &recvFDs, NULL, NULL, NULL);
+	if (ret < 0) {
+		fprintf(stderr, "<%s>: Failed select: <%s>\n",
+		__func__, strerror(errno));
+		return -1;
+	}
+
+	if (FD_ISSET(sockfd, &recvFDs)) {
+		len = sizeof(buffd);
+		memset(&msgh, 0, sizeof(msgh));
+		msgh.msg_control = &cmsg_b;
+		msgh.msg_controllen = CMSG_LEN(len);
+		iov.iov_base = data;
+		iov.iov_len = sizeof(data)-1;
+		msgh.msg_iov = &iov;
+		msgh.msg_iovlen = 1;
+		cmsg = CMSG_FIRSTHDR(&msgh);
+		cmsg->cmsg_level = SOL_SOCKET;
+		cmsg->cmsg_type = SCM_RIGHTS;
+		cmsg->cmsg_len = CMSG_LEN(len);
+
+		ret = recvmsg(sockfd, &msgh, MSG_DONTWAIT);
+		if (ret < 0) {
+			fprintf(stderr, "<%s>: Failed recvmsg: <%s>\n",
+			__func__, strerror(errno));
+			return -1;
+		}
+
+		memcpy(&buffd, CMSG_DATA(cmsg), len);
+		skdata->data = buffd;
+		skdata->len = len;
+	}
+	return 0;
+}
+
+int closesocket(int sockfd, char *name)
+{
+	char sockname[MAX_SOCK_NAME_LEN];
+
+	if (sockfd)
+		close(sockfd);
+	sprintf(sockname, "/tmp/%s", name);
+	unlink(sockname);
+	shutdown(sockfd, 2);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/android/ion/ipcsocket.h b/tools/testing/selftests/android/ion/ipcsocket.h
new file mode 100644
index 000000000..b3e84498a
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ipcsocket.h
@@ -0,0 +1,35 @@
+
+#ifndef _IPCSOCKET_H
+#define _IPCSOCKET_H
+
+
+#define MAX_SOCK_NAME_LEN	64
+
+char sock_name[MAX_SOCK_NAME_LEN];
+
+/* This structure is responsible for holding the IPC data
+ * data: hold the buffer fd
+ * len: just the length of 32-bit integer fd
+ */
+struct socketdata {
+	int data;
+	unsigned int len;
+};
+
+/* This API is used to open the IPC socket connection
+ * name: implies a unique socket name in the system
+ * connecttype: implies server(0) or client(1)
+ */
+int opensocket(int *sockfd, const char *name, int connecttype);
+
+/* This is the API to send socket data over IPC socket */
+int sendtosocket(int sockfd, struct socketdata *data);
+
+/* This is the API to receive socket data over IPC socket */
+int receivefromsocket(int sockfd, struct socketdata *data);
+
+/* This is the API to close the socket connection */
+int closesocket(int sockfd, char *name);
+
+
+#endif
diff --git a/tools/testing/selftests/android/run.sh b/tools/testing/selftests/android/run.sh
new file mode 100755
index 000000000..dd8edf291
--- /dev/null
+++ b/tools/testing/selftests/android/run.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+(cd ion; ./ion_test.sh)
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
new file mode 100644
index 000000000..49938d72c
--- /dev/null
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -0,0 +1,21 @@
+test_verifier
+test_maps
+test_lru_map
+test_lpm_map
+test_tag
+FEATURE-DUMP.libbpf
+fixdep
+test_align
+test_dev_cgroup
+test_progs
+test_tcpbpf_user
+test_verifier_log
+feature
+test_libbpf_open
+test_sock
+test_sock_addr
+urandom_read
+test_btf
+test_sockmap
+test_lirc_mode2_user
+get_cgroup_id_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
new file mode 100644
index 000000000..f3f874ba1
--- /dev/null
+++ b/tools/testing/selftests/bpf/Makefile
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: GPL-2.0
+
+LIBDIR := ../../../lib
+BPFDIR := $(LIBDIR)/bpf
+APIDIR := ../../../include/uapi
+GENDIR := ../../../../include/generated
+GENHDR := $(GENDIR)/autoconf.h
+
+ifneq ($(wildcard $(GENHDR)),)
+  GENFLAGS := -DHAVE_GENHDR
+endif
+
+CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+LDLIBS += -lcap -lelf -lrt -lpthread
+
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+all: $(TEST_CUSTOM_PROGS)
+
+$(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
+	$(CC) -o $(TEST_CUSTOM_PROGS) -static $< -Wl,--build-id
+
+# Order correspond to 'make run_tests' order
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
+	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
+	test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
+	test_socket_cookie test_cgroup_storage test_select_reuseport
+
+TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
+	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
+	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
+	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
+	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
+	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
+	test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
+	test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
+	test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
+	get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
+	test_skb_cgroup_id_kern.o
+
+# Order correspond to 'make run_tests' order
+TEST_PROGS := test_kmod.sh \
+	test_libbpf.sh \
+	test_xdp_redirect.sh \
+	test_xdp_meta.sh \
+	test_offload.py \
+	test_sock_addr.sh \
+	test_tunnel.sh \
+	test_lwt_seg6local.sh \
+	test_lirc_mode2.sh \
+	test_skb_cgroup_id.sh
+
+# Compile but not part of 'make run_tests'
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
+
+include ../lib.mk
+
+BPFOBJ := $(OUTPUT)/libbpf.a
+
+$(TEST_GEN_PROGS): $(BPFOBJ)
+
+$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
+
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
+$(OUTPUT)/test_sock: cgroup_helpers.c
+$(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
+$(OUTPUT)/test_sockmap: cgroup_helpers.c
+$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
+$(OUTPUT)/test_progs: trace_helpers.c
+$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
+$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
+
+.PHONY: force
+
+# force a rebuild of BPFOBJ when its dependencies are updated
+force:
+
+$(BPFOBJ): force
+	$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
+
+CLANG ?= clang
+LLC   ?= llc
+LLVM_OBJCOPY ?= llvm-objcopy
+BTF_PAHOLE ?= pahole
+
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+# Let newer LLVM versions transparently probe the kernel for availability
+# of full BPF instruction set.
+ifeq ($(PROBE),)
+  CPU ?= probe
+else
+  CPU ?= generic
+endif
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
+	      $(CLANG_SYS_INCLUDES) \
+	      -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
+$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
+
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+
+ifneq ($(BTF_LLC_PROBE),)
+ifneq ($(BTF_PAHOLE_PROBE),)
+ifneq ($(BTF_OBJCOPY_PROBE),)
+	CLANG_FLAGS += -g
+	LLC_FLAGS += -mattr=dwarfris
+	DWARF2BTF = y
+endif
+endif
+endif
+
+# Have one program compiled without "-target bpf" to test whether libbpf loads
+# it successfully
+$(OUTPUT)/test_xdp.o: test_xdp.c
+	$(CLANG) $(CLANG_FLAGS) \
+		-O2 -emit-llvm -c $< -o - | \
+	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+	$(BTF_PAHOLE) -J $@
+endif
+
+$(OUTPUT)/%.o: %.c
+	$(CLANG) $(CLANG_FLAGS) \
+		 -O2 -target bpf -emit-llvm -c $< -o - |      \
+	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+	$(BTF_PAHOLE) -J $@
+endif
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
new file mode 100644
index 000000000..b25595ea4
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_ENDIAN__
+#define __BPF_ENDIAN__
+
+#include <linux/swab.h>
+
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x)			__builtin_bswap16(x)
+# define __bpf_htons(x)			__builtin_bswap16(x)
+# define __bpf_constant_ntohs(x)	___constant_swab16(x)
+# define __bpf_constant_htons(x)	___constant_swab16(x)
+# define __bpf_ntohl(x)			__builtin_bswap32(x)
+# define __bpf_htonl(x)			__builtin_bswap32(x)
+# define __bpf_constant_ntohl(x)	___constant_swab32(x)
+# define __bpf_constant_htonl(x)	___constant_swab32(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x)			(x)
+# define __bpf_htons(x)			(x)
+# define __bpf_constant_ntohs(x)	(x)
+# define __bpf_constant_htons(x)	(x)
+# define __bpf_ntohl(x)			(x)
+# define __bpf_htonl(x)			(x)
+# define __bpf_constant_ntohl(x)	(x)
+# define __bpf_constant_htonl(x)	(x)
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#define bpf_htons(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_htons(x) : __bpf_htons(x))
+#define bpf_ntohs(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_ntohs(x) : __bpf_ntohs(x))
+#define bpf_htonl(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_htonl(x) : __bpf_htonl(x))
+#define bpf_ntohl(x)				\
+	(__builtin_constant_p(x) ?		\
+	 __bpf_constant_ntohl(x) : __bpf_ntohl(x))
+
+#endif /* __BPF_ENDIAN__ */
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
new file mode 100644
index 000000000..e4be77302
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -0,0 +1,336 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_HELPERS_H
+#define __BPF_HELPERS_H
+
+/* helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+/* helper functions called from eBPF programs written in C */
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+	(void *) BPF_FUNC_map_lookup_elem;
+static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+				  unsigned long long flags) =
+	(void *) BPF_FUNC_map_update_elem;
+static int (*bpf_map_delete_elem)(void *map, void *key) =
+	(void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+	(void *) BPF_FUNC_probe_read;
+static unsigned long long (*bpf_ktime_get_ns)(void) =
+	(void *) BPF_FUNC_ktime_get_ns;
+static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+	(void *) BPF_FUNC_trace_printk;
+static void (*bpf_tail_call)(void *ctx, void *map, int index) =
+	(void *) BPF_FUNC_tail_call;
+static unsigned long long (*bpf_get_smp_processor_id)(void) =
+	(void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+	(void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+	(void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+	(void *) BPF_FUNC_get_current_comm;
+static unsigned long long (*bpf_perf_event_read)(void *map,
+						 unsigned long long flags) =
+	(void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+	(void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+	(void *) BPF_FUNC_redirect;
+static int (*bpf_redirect_map)(void *map, int key, int flags) =
+	(void *) BPF_FUNC_redirect_map;
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+				    unsigned long long flags, void *data,
+				    int size) =
+	(void *) BPF_FUNC_perf_event_output;
+static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
+	(void *) BPF_FUNC_get_stackid;
+static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
+	(void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+	(void *) BPF_FUNC_current_task_under_cgroup;
+static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
+	(void *) BPF_FUNC_skb_get_tunnel_key;
+static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
+	(void *) BPF_FUNC_skb_set_tunnel_key;
+static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
+	(void *) BPF_FUNC_skb_get_tunnel_opt;
+static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
+	(void *) BPF_FUNC_skb_set_tunnel_opt;
+static unsigned long long (*bpf_get_prandom_u32)(void) =
+	(void *) BPF_FUNC_get_prandom_u32;
+static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
+	(void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
+	(void *) BPF_FUNC_xdp_adjust_meta;
+static int (*bpf_get_socket_cookie)(void *ctx) =
+	(void *) BPF_FUNC_get_socket_cookie;
+static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
+			     int optlen) =
+	(void *) BPF_FUNC_setsockopt;
+static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
+			     int optlen) =
+	(void *) BPF_FUNC_getsockopt;
+static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
+	(void *) BPF_FUNC_sock_ops_cb_flags_set;
+static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
+	(void *) BPF_FUNC_sk_redirect_map;
+static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) =
+	(void *) BPF_FUNC_sk_redirect_hash;
+static int (*bpf_sock_map_update)(void *map, void *key, void *value,
+				  unsigned long long flags) =
+	(void *) BPF_FUNC_sock_map_update;
+static int (*bpf_sock_hash_update)(void *map, void *key, void *value,
+				   unsigned long long flags) =
+	(void *) BPF_FUNC_sock_hash_update;
+static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
+					void *buf, unsigned int buf_size) =
+	(void *) BPF_FUNC_perf_event_read_value;
+static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
+				       unsigned int buf_size) =
+	(void *) BPF_FUNC_perf_prog_read_value;
+static int (*bpf_override_return)(void *ctx, unsigned long rc) =
+	(void *) BPF_FUNC_override_return;
+static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
+	(void *) BPF_FUNC_msg_redirect_map;
+static int (*bpf_msg_redirect_hash)(void *ctx,
+				    void *map, void *key, int flags) =
+	(void *) BPF_FUNC_msg_redirect_hash;
+static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
+	(void *) BPF_FUNC_msg_apply_bytes;
+static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
+	(void *) BPF_FUNC_msg_cork_bytes;
+static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
+	(void *) BPF_FUNC_msg_pull_data;
+static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
+	(void *) BPF_FUNC_bind;
+static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
+	(void *) BPF_FUNC_xdp_adjust_tail;
+static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
+				     int size, int flags) =
+	(void *) BPF_FUNC_skb_get_xfrm_state;
+static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) =
+	(void *) BPF_FUNC_sk_select_reuseport;
+static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
+	(void *) BPF_FUNC_get_stack;
+static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
+			     int plen, __u32 flags) =
+	(void *) BPF_FUNC_fib_lookup;
+static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
+				 unsigned int len) =
+	(void *) BPF_FUNC_lwt_push_encap;
+static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
+				       void *from, unsigned int len) =
+	(void *) BPF_FUNC_lwt_seg6_store_bytes;
+static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
+				  unsigned int param_len) =
+	(void *) BPF_FUNC_lwt_seg6_action;
+static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
+				      unsigned int len) =
+	(void *) BPF_FUNC_lwt_seg6_adjust_srh;
+static int (*bpf_rc_repeat)(void *ctx) =
+	(void *) BPF_FUNC_rc_repeat;
+static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
+			     unsigned long long scancode, unsigned int toggle) =
+	(void *) BPF_FUNC_rc_keydown;
+static unsigned long long (*bpf_get_current_cgroup_id)(void) =
+	(void *) BPF_FUNC_get_current_cgroup_id;
+static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
+	(void *) BPF_FUNC_get_local_storage;
+static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
+	(void *) BPF_FUNC_skb_cgroup_id;
+static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
+	(void *) BPF_FUNC_skb_ancestor_cgroup_id;
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+struct sk_buff;
+unsigned long long load_byte(void *skb,
+			     unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+			     unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+			     unsigned long long off) asm("llvm.bpf.load.word");
+
+/* a helper structure used by eBPF C program
+ * to describe map attributes to elf_bpf loader
+ */
+struct bpf_map_def {
+	unsigned int type;
+	unsigned int key_size;
+	unsigned int value_size;
+	unsigned int max_entries;
+	unsigned int map_flags;
+	unsigned int inner_map_idx;
+	unsigned int numa_node;
+};
+
+#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val)		\
+	struct ____btf_map_##name {				\
+		type_key key;					\
+		type_val value;					\
+	};							\
+	struct ____btf_map_##name				\
+	__attribute__ ((section(".maps." #name), used))		\
+		____btf_map_##name = { }
+
+static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
+	(void *) BPF_FUNC_skb_load_bytes;
+static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) =
+	(void *) BPF_FUNC_skb_load_bytes_relative;
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
+	(void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+	(void *) BPF_FUNC_l3_csum_replace;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+	(void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
+	(void *) BPF_FUNC_csum_diff;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+	(void *) BPF_FUNC_skb_under_cgroup;
+static int (*bpf_skb_change_head)(void *, int len, int flags) =
+	(void *) BPF_FUNC_skb_change_head;
+static int (*bpf_skb_pull_data)(void *, int len) =
+	(void *) BPF_FUNC_skb_pull_data;
+
+/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
+#if defined(__TARGET_ARCH_x86)
+	#define bpf_target_x86
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_s930x)
+	#define bpf_target_s930x
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm64)
+	#define bpf_target_arm64
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_mips)
+	#define bpf_target_mips
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_powerpc)
+	#define bpf_target_powerpc
+	#define bpf_target_defined
+#elif defined(__TARGET_ARCH_sparc)
+	#define bpf_target_sparc
+	#define bpf_target_defined
+#else
+	#undef bpf_target_defined
+#endif
+
+/* Fall back to what the compiler says */
+#ifndef bpf_target_defined
+#if defined(__x86_64__)
+	#define bpf_target_x86
+#elif defined(__s390x__)
+	#define bpf_target_s930x
+#elif defined(__aarch64__)
+	#define bpf_target_arm64
+#elif defined(__mips__)
+	#define bpf_target_mips
+#elif defined(__powerpc__)
+	#define bpf_target_powerpc
+#elif defined(__sparc__)
+	#define bpf_target_sparc
+#endif
+#endif
+
+#if defined(bpf_target_x86)
+
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->ip)
+
+#elif defined(bpf_target_s390x)
+
+#define PT_REGS_PARM1(x) ((x)->gprs[2])
+#define PT_REGS_PARM2(x) ((x)->gprs[3])
+#define PT_REGS_PARM3(x) ((x)->gprs[4])
+#define PT_REGS_PARM4(x) ((x)->gprs[5])
+#define PT_REGS_PARM5(x) ((x)->gprs[6])
+#define PT_REGS_RET(x) ((x)->gprs[14])
+#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->gprs[2])
+#define PT_REGS_SP(x) ((x)->gprs[15])
+#define PT_REGS_IP(x) ((x)->psw.addr)
+
+#elif defined(bpf_target_arm64)
+
+#define PT_REGS_PARM1(x) ((x)->regs[0])
+#define PT_REGS_PARM2(x) ((x)->regs[1])
+#define PT_REGS_PARM3(x) ((x)->regs[2])
+#define PT_REGS_PARM4(x) ((x)->regs[3])
+#define PT_REGS_PARM5(x) ((x)->regs[4])
+#define PT_REGS_RET(x) ((x)->regs[30])
+#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[0])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->pc)
+
+#elif defined(bpf_target_mips)
+
+#define PT_REGS_PARM1(x) ((x)->regs[4])
+#define PT_REGS_PARM2(x) ((x)->regs[5])
+#define PT_REGS_PARM3(x) ((x)->regs[6])
+#define PT_REGS_PARM4(x) ((x)->regs[7])
+#define PT_REGS_PARM5(x) ((x)->regs[8])
+#define PT_REGS_RET(x) ((x)->regs[31])
+#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_SP(x) ((x)->regs[29])
+#define PT_REGS_IP(x) ((x)->cp0_epc)
+
+#elif defined(bpf_target_powerpc)
+
+#define PT_REGS_PARM1(x) ((x)->gpr[3])
+#define PT_REGS_PARM2(x) ((x)->gpr[4])
+#define PT_REGS_PARM3(x) ((x)->gpr[5])
+#define PT_REGS_PARM4(x) ((x)->gpr[6])
+#define PT_REGS_PARM5(x) ((x)->gpr[7])
+#define PT_REGS_RC(x) ((x)->gpr[3])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->nip)
+
+#elif defined(bpf_target_sparc)
+
+#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
+#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
+#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
+#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
+#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
+#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+
+/* Should this also be a bpf_target check for the sparc case? */
+#if defined(__arch64__)
+#define PT_REGS_IP(x) ((x)->tpc)
+#else
+#define PT_REGS_IP(x) ((x)->pc)
+#endif
+
+#endif
+
+#ifdef bpf_target_powerpc
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = (ctx)->link; })
+#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+#elif bpf_target_sparc
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP
+#else
+#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({				\
+		bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx)	({				\
+		bpf_probe_read(&(ip), sizeof(ip),				\
+				(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_rand.h b/tools/testing/selftests/bpf/bpf_rand.h
new file mode 100644
index 000000000..59bf3e1a9
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rand.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_RAND__
+#define __BPF_RAND__
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+
+static inline uint64_t bpf_rand_mask(uint64_t mask)
+{
+	return (((uint64_t)(uint32_t)rand()) |
+	        ((uint64_t)(uint32_t)rand() << 32)) & mask;
+}
+
+#define bpf_rand_ux(x, m)			\
+static inline uint64_t bpf_rand_u##x(int shift)	\
+{						\
+	return bpf_rand_mask((m)) << shift;	\
+}
+
+bpf_rand_ux( 8,               0xffULL)
+bpf_rand_ux(16,             0xffffULL)
+bpf_rand_ux(24,           0xffffffULL)
+bpf_rand_ux(32,         0xffffffffULL)
+bpf_rand_ux(40,       0xffffffffffULL)
+bpf_rand_ux(48,     0xffffffffffffULL)
+bpf_rand_ux(56,   0xffffffffffffffULL)
+bpf_rand_ux(64, 0xffffffffffffffffULL)
+
+static inline void bpf_semi_rand_init(void)
+{
+	srand(time(NULL));
+}
+
+static inline uint64_t bpf_semi_rand_get(void)
+{
+	switch (rand() % 39) {
+	case  0: return 0x000000ff00000000ULL | bpf_rand_u8(0);
+	case  1: return 0xffffffff00000000ULL | bpf_rand_u16(0);
+	case  2: return 0x00000000ffff0000ULL | bpf_rand_u16(0);
+	case  3: return 0x8000000000000000ULL | bpf_rand_u32(0);
+	case  4: return 0x00000000f0000000ULL | bpf_rand_u32(0);
+	case  5: return 0x0000000100000000ULL | bpf_rand_u24(0);
+	case  6: return 0x800ff00000000000ULL | bpf_rand_u32(0);
+	case  7: return 0x7fffffff00000000ULL | bpf_rand_u32(0);
+	case  8: return 0xffffffffffffff00ULL ^ bpf_rand_u32(24);
+	case  9: return 0xffffffffffffff00ULL | bpf_rand_u8(0);
+	case 10: return 0x0000000010000000ULL | bpf_rand_u32(0);
+	case 11: return 0xf000000000000000ULL | bpf_rand_u8(0);
+	case 12: return 0x0000f00000000000ULL | bpf_rand_u8(8);
+	case 13: return 0x000000000f000000ULL | bpf_rand_u8(16);
+	case 14: return 0x0000000000000f00ULL | bpf_rand_u8(32);
+	case 15: return 0x00fff00000000f00ULL | bpf_rand_u8(48);
+	case 16: return 0x00007fffffffffffULL ^ bpf_rand_u32(1);
+	case 17: return 0xffff800000000000ULL | bpf_rand_u8(4);
+	case 18: return 0xffff800000000000ULL | bpf_rand_u8(20);
+	case 19: return (0xffffffc000000000ULL + 0x80000ULL) | bpf_rand_u32(0);
+	case 20: return (0xffffffc000000000ULL - 0x04000000ULL) | bpf_rand_u32(0);
+	case 21: return 0x0000000000000000ULL | bpf_rand_u8(55) | bpf_rand_u32(20);
+	case 22: return 0xffffffffffffffffULL ^ bpf_rand_u8(3) ^ bpf_rand_u32(40);
+	case 23: return 0x0000000000000000ULL | bpf_rand_u8(bpf_rand_u8(0) % 64);
+	case 24: return 0x0000000000000000ULL | bpf_rand_u16(bpf_rand_u8(0) % 64);
+	case 25: return 0xffffffffffffffffULL ^ bpf_rand_u8(bpf_rand_u8(0) % 64);
+	case 26: return 0xffffffffffffffffULL ^ bpf_rand_u40(bpf_rand_u8(0) % 64);
+	case 27: return 0x0000800000000000ULL;
+	case 28: return 0x8000000000000000ULL;
+	case 29: return 0x0000000000000000ULL;
+	case 30: return 0xffffffffffffffffULL;
+	case 31: return bpf_rand_u16(bpf_rand_u8(0) % 64);
+	case 32: return bpf_rand_u24(bpf_rand_u8(0) % 64);
+	case 33: return bpf_rand_u32(bpf_rand_u8(0) % 64);
+	case 34: return bpf_rand_u40(bpf_rand_u8(0) % 64);
+	case 35: return bpf_rand_u48(bpf_rand_u8(0) % 64);
+	case 36: return bpf_rand_u56(bpf_rand_u8(0) % 64);
+	case 37: return bpf_rand_u64(bpf_rand_u8(0) % 64);
+	default: return bpf_rand_u64(0);
+	}
+}
+
+#endif /* __BPF_RAND__ */
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
new file mode 100644
index 000000000..9dac9b30f
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rlimit.h
@@ -0,0 +1,28 @@
+#include <sys/resource.h>
+#include <stdio.h>
+
+static  __attribute__((constructor)) void bpf_rlimit_ctor(void)
+{
+	struct rlimit rlim_old, rlim_new = {
+		.rlim_cur	= RLIM_INFINITY,
+		.rlim_max	= RLIM_INFINITY,
+	};
+
+	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+	/* For the sake of running the test cases, we temporarily
+	 * set rlimit to infinity in order for kernel to focus on
+	 * errors from actual test cases and not getting noise
+	 * from hitting memlock limits. The limit is on per-process
+	 * basis and not a global one, hence destructor not really
+	 * needed here.
+	 */
+	if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
+		perror("Unable to lift memlock rlimit");
+		/* Trying out lower limit, but expect potential test
+		 * case failures from this!
+		 */
+		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+	}
+}
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
new file mode 100644
index 000000000..84fd6f1bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_UTIL__
+#define __BPF_UTIL__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+static inline unsigned int bpf_num_possible_cpus(void)
+{
+	static const char *fcpu = "/sys/devices/system/cpu/possible";
+	unsigned int start, end, possible_cpus = 0;
+	char buff[128];
+	FILE *fp;
+	int len, n, i, j = 0;
+
+	fp = fopen(fcpu, "r");
+	if (!fp) {
+		printf("Failed to open %s: '%s'!\n", fcpu, strerror(errno));
+		exit(1);
+	}
+
+	if (!fgets(buff, sizeof(buff), fp)) {
+		printf("Failed to read %s!\n", fcpu);
+		exit(1);
+	}
+
+	len = strlen(buff);
+	for (i = 0; i <= len; i++) {
+		if (buff[i] == ',' || buff[i] == '\0') {
+			buff[i] = '\0';
+			n = sscanf(&buff[j], "%u-%u", &start, &end);
+			if (n <= 0) {
+				printf("Failed to retrieve # possible CPUs!\n");
+				exit(1);
+			} else if (n == 1) {
+				end = start;
+			}
+			possible_cpus += end - start + 1;
+			j = i + 1;
+		}
+	}
+
+	fclose(fp);
+
+	return possible_cpus;
+}
+
+#define __bpf_percpu_val_align	__attribute__((__aligned__(8)))
+
+#define BPF_DECLARE_PERCPU(type, name)				\
+	struct { type v; /* padding */ } __bpf_percpu_val_align	\
+		name[bpf_num_possible_cpus()]
+#define bpf_percpu(name, cpu) name[(cpu)].v
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#endif /* __BPF_UTIL__ */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
new file mode 100644
index 000000000..6af24f9a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/sched.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ftw.h>
+
+
+#include "cgroup_helpers.h"
+
+/*
+ * To avoid relying on the system setup, when setup_cgroup_env is called
+ * we create a new mount namespace, and cgroup namespace. The cgroup2
+ * root is mounted at CGROUP_MOUNT_PATH
+ *
+ * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
+ * It's easier to create our own mount namespace and manage it ourselves.
+ *
+ * We assume /mnt exists.
+ */
+
+#define WALK_FD_LIMIT			16
+#define CGROUP_MOUNT_PATH		"/mnt"
+#define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
+#define format_cgroup_path(buf, path) \
+	snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
+		 CGROUP_WORK_DIR, path)
+
+/**
+ * setup_cgroup_environment() - Setup the cgroup environment
+ *
+ * After calling this function, cleanup_cgroup_environment should be called
+ * once testing is complete.
+ *
+ * This function will print an error to stderr and return 1 if it is unable
+ * to setup the cgroup environment. If setup is successful, 0 is returned.
+ */
+int setup_cgroup_environment(void)
+{
+	char cgroup_workdir[PATH_MAX - 24];
+
+	format_cgroup_path(cgroup_workdir, "");
+
+	if (unshare(CLONE_NEWNS)) {
+		log_err("unshare");
+		return 1;
+	}
+
+	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
+		log_err("mount fakeroot");
+		return 1;
+	}
+
+	if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
+		log_err("mount cgroup2");
+		return 1;
+	}
+
+	/* Cleanup existing failed runs, now that the environment is setup */
+	cleanup_cgroup_environment();
+
+	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
+		log_err("mkdir cgroup work dir");
+		return 1;
+	}
+
+	return 0;
+}
+
+static int nftwfunc(const char *filename, const struct stat *statptr,
+		    int fileflags, struct FTW *pfwt)
+{
+	if ((fileflags & FTW_D) && rmdir(filename))
+		log_err("Removing cgroup: %s", filename);
+	return 0;
+}
+
+
+static int join_cgroup_from_top(char *cgroup_path)
+{
+	char cgroup_procs_path[PATH_MAX + 1];
+	pid_t pid = getpid();
+	int fd, rc = 0;
+
+	snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
+		 "%s/cgroup.procs", cgroup_path);
+
+	fd = open(cgroup_procs_path, O_WRONLY);
+	if (fd < 0) {
+		log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
+		return 1;
+	}
+
+	if (dprintf(fd, "%d\n", pid) < 0) {
+		log_err("Joining Cgroup");
+		rc = 1;
+	}
+
+	close(fd);
+	return rc;
+}
+
+/**
+ * join_cgroup() - Join a cgroup
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir, and it joins it. For example, passing "/my-cgroup" as the path
+ * would actually put the calling process into the cgroup
+ * "/cgroup-test-work-dir/my-cgroup"
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_cgroup(const char *path)
+{
+	char cgroup_path[PATH_MAX + 1];
+
+	format_cgroup_path(cgroup_path, path);
+	return join_cgroup_from_top(cgroup_path);
+}
+
+/**
+ * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
+ *
+ * This is an idempotent function to delete all temporary cgroups that
+ * have been created during the test, including the cgroup testing work
+ * directory.
+ *
+ * At call time, it moves the calling process to the root cgroup, and then
+ * runs the deletion process. It is idempotent, and should not fail, unless
+ * a process is lingering.
+ *
+ * On failure, it will print an error to stderr, and try to continue.
+ */
+void cleanup_cgroup_environment(void)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_cgroup_path(cgroup_workdir, "");
+	join_cgroup_from_top(CGROUP_MOUNT_PATH);
+	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
+
+/**
+ * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * This function creates a cgroup under the top level workdir and returns the
+ * file descriptor. It is idempotent.
+ *
+ * On success, it returns the file descriptor. On failure it returns 0.
+ * If there is a failure, it prints the error to stderr.
+ */
+int create_and_get_cgroup(const char *path)
+{
+	char cgroup_path[PATH_MAX + 1];
+	int fd;
+
+	format_cgroup_path(cgroup_path, path);
+	if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
+		log_err("mkdiring cgroup %s .. %s", path, cgroup_path);
+		return 0;
+	}
+
+	fd = open(cgroup_path, O_RDONLY);
+	if (fd < 0) {
+		log_err("Opening Cgroup");
+		return 0;
+	}
+
+	return fd;
+}
+
+/**
+ * get_cgroup_id() - Get cgroup id for a particular cgroup path
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * On success, it returns the cgroup id. On failure it returns 0,
+ * which is an invalid cgroup id.
+ * If there is a failure, it prints the error to stderr.
+ */
+unsigned long long get_cgroup_id(const char *path)
+{
+	int dirfd, err, flags, mount_id, fhsize;
+	union {
+		unsigned long long cgid;
+		unsigned char raw_bytes[8];
+	} id;
+	char cgroup_workdir[PATH_MAX + 1];
+	struct file_handle *fhp, *fhp2;
+	unsigned long long ret = 0;
+
+	format_cgroup_path(cgroup_workdir, path);
+
+	dirfd = AT_FDCWD;
+	flags = 0;
+	fhsize = sizeof(*fhp);
+	fhp = calloc(1, fhsize);
+	if (!fhp) {
+		log_err("calloc");
+		return 0;
+	}
+	err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
+	if (err >= 0 || fhp->handle_bytes != 8) {
+		log_err("name_to_handle_at");
+		goto free_mem;
+	}
+
+	fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
+	fhp2 = realloc(fhp, fhsize);
+	if (!fhp2) {
+		log_err("realloc");
+		goto free_mem;
+	}
+	err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
+	fhp = fhp2;
+	if (err < 0) {
+		log_err("name_to_handle_at");
+		goto free_mem;
+	}
+
+	memcpy(id.raw_bytes, fhp->f_handle, 8);
+	ret = id.cgid;
+
+free_mem:
+	free(fhp);
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
new file mode 100644
index 000000000..d64bb8957
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CGROUP_HELPERS_H
+#define __CGROUP_HELPERS_H
+#include <errno.h>
+#include <string.h>
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+
+int create_and_get_cgroup(const char *path);
+int join_cgroup(const char *path);
+int setup_cgroup_environment(void);
+void cleanup_cgroup_environment(void);
+unsigned long long get_cgroup_id(const char *path);
+
+#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
new file mode 100644
index 000000000..b4994a949
--- /dev/null
+++ b/tools/testing/selftests/bpf/config
@@ -0,0 +1,20 @@
+CONFIG_BPF=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_BPF_EVENTS=y
+CONFIG_TEST_BPF=m
+CONFIG_CGROUP_BPF=y
+CONFIG_NETDEVSIM=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_IPIP=y
+CONFIG_IPV6=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPGRE=y
+CONFIG_IPV6_GRE=y
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_HMAC=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_VXLAN=y
+CONFIG_GENEVE=y
diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c
new file mode 100644
index 000000000..5a88a681d
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect4_prog.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP4		0x7f000004U
+#define DST_REWRITE_IP4		0x7f000001U
+#define DST_REWRITE_PORT4	4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect4")
+int connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+	struct sockaddr_in sa;
+
+	/* Rewrite destination. */
+	ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+	ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+
+	if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
+		///* Rewrite source. */
+		memset(&sa, 0, sizeof(sa));
+
+		sa.sin_family = AF_INET;
+		sa.sin_port = bpf_htons(0);
+		sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
+
+		if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+			return 0;
+	}
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c
new file mode 100644
index 000000000..8ea3f7d12
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect6_prog.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0	0
+#define SRC_REWRITE_IP6_1	0
+#define SRC_REWRITE_IP6_2	0
+#define SRC_REWRITE_IP6_3	6
+
+#define DST_REWRITE_IP6_0	0
+#define DST_REWRITE_IP6_1	0
+#define DST_REWRITE_IP6_2	0
+#define DST_REWRITE_IP6_3	1
+
+#define DST_REWRITE_PORT6	6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect6")
+int connect_v6_prog(struct bpf_sock_addr *ctx)
+{
+	struct sockaddr_in6 sa;
+
+	/* Rewrite destination. */
+	ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+	ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+	ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+	ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+	ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+	if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
+		/* Rewrite source. */
+		memset(&sa, 0, sizeof(sa));
+
+		sa.sin6_family = AF_INET6;
+		sa.sin6_port = bpf_htons(0);
+
+		sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+		sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+		sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+		sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+		if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+			return 0;
+	}
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/dev_cgroup.c b/tools/testing/selftests/bpf/dev_cgroup.c
new file mode 100644
index 000000000..ce41a3475
--- /dev/null
+++ b/tools/testing/selftests/bpf/dev_cgroup.c
@@ -0,0 +1,60 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+SEC("cgroup/dev")
+int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
+{
+	short type = ctx->access_type & 0xFFFF;
+#ifdef DEBUG
+	short access = ctx->access_type >> 16;
+	char fmt[] = "  %d:%d    \n";
+
+	switch (type) {
+	case BPF_DEVCG_DEV_BLOCK:
+		fmt[0] = 'b';
+		break;
+	case BPF_DEVCG_DEV_CHAR:
+		fmt[0] = 'c';
+		break;
+	default:
+		fmt[0] = '?';
+		break;
+	}
+
+	if (access & BPF_DEVCG_ACC_READ)
+		fmt[8] = 'r';
+
+	if (access & BPF_DEVCG_ACC_WRITE)
+		fmt[9] = 'w';
+
+	if (access & BPF_DEVCG_ACC_MKNOD)
+		fmt[10] = 'm';
+
+	bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor);
+#endif
+
+	/* Allow access to /dev/zero and /dev/random.
+	 * Forbid everything else.
+	 */
+	if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR)
+		return 0;
+
+	switch (ctx->minor) {
+	case 5: /* 1:5 /dev/zero */
+	case 9: /* 1:9 /dev/urandom */
+		return 1;
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
new file mode 100644
index 000000000..014dba10b
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") cg_ids = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") pidmap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int trace(void *ctx)
+{
+	__u32 pid = bpf_get_current_pid_tgid();
+	__u32 key = 0, *expected_pid;
+	__u64 *val;
+
+	expected_pid = bpf_map_lookup_elem(&pidmap, &key);
+	if (!expected_pid || *expected_pid != pid)
+		return 0;
+
+	val = bpf_map_lookup_elem(&cg_ids, &key);
+	if (val)
+		*val = bpf_get_current_cgroup_id();
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
new file mode 100644
index 000000000..e8da7b391
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define CHECK(condition, tag, format...) ({		\
+	int __ret = !!(condition);			\
+	if (__ret) {					\
+		printf("%s:FAIL:%s ", __func__, tag);	\
+		printf(format);				\
+	} else {					\
+		printf("%s:PASS:%s\n", __func__, tag);	\
+	}						\
+	__ret;						\
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+			const char *name)
+{
+	struct bpf_map *map;
+
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map)
+		return -1;
+	return bpf_map__fd(map);
+}
+
+#define TEST_CGROUP "/test-bpf-get-cgroup-id/"
+
+int main(int argc, char **argv)
+{
+	const char *probe_name = "syscalls/sys_enter_nanosleep";
+	const char *file = "get_cgroup_id_kern.o";
+	int err, bytes, efd, prog_fd, pmu_fd;
+	int cgroup_fd, cgidmap_fd, pidmap_fd;
+	struct perf_event_attr attr = {};
+	struct bpf_object *obj;
+	__u64 kcgid = 0, ucgid;
+	__u32 key = 0, pid;
+	int exit_code = 1;
+	char buf[256];
+
+	err = setup_cgroup_environment();
+	if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
+		  errno))
+		return 1;
+
+	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+	if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
+		  cgroup_fd, errno))
+		goto cleanup_cgroup_env;
+
+	err = join_cgroup(TEST_CGROUP);
+	if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
+		goto cleanup_cgroup_env;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+		goto cleanup_cgroup_env;
+
+	cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
+	if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+		  cgidmap_fd, errno))
+		goto close_prog;
+
+	pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
+	if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+		  pidmap_fd, errno))
+		goto close_prog;
+
+	pid = getpid();
+	bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
+
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+		  "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+
+	/* attach to this pid so the all bpf invocations will be in the
+	 * cgroup associated with this pid.
+	 */
+	pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+		  errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	/* trigger some syscalls */
+	sleep(1);
+
+	err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
+	if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
+		goto close_pmu;
+
+	ucgid = get_cgroup_id(TEST_CGROUP);
+	if (CHECK(kcgid != ucgid, "compare_cgroup_id",
+		  "kern cgid %llx user cgid %llx", kcgid, ucgid))
+		goto close_pmu;
+
+	exit_code = 0;
+	printf("%s:PASS\n", argv[0]);
+
+close_pmu:
+	close(pmu_fd);
+close_prog:
+	bpf_object__close(obj);
+cleanup_cgroup_env:
+	cleanup_cgroup_environment();
+	return exit_code;
+}
diff --git a/tools/testing/selftests/bpf/gnu/stubs.h b/tools/testing/selftests/bpf/gnu/stubs.h
new file mode 100644
index 000000000..719225b16
--- /dev/null
+++ b/tools/testing/selftests/bpf/gnu/stubs.h
@@ -0,0 +1 @@
+/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */
diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h
new file mode 100644
index 000000000..91fa51a9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/include/uapi/linux/types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_LINUX_TYPES_H
+#define _UAPI_LINUX_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+/* copied from linux:include/uapi/linux/types.h */
+#define __bitwise
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+
+#define __aligned_u64 __u64 __attribute__((aligned(8)))
+#define __aligned_be64 __be64 __attribute__((aligned(8)))
+#define __aligned_le64 __le64 __attribute__((aligned(8)))
+
+#endif /* _UAPI_LINUX_TYPES_H */
diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c
new file mode 100644
index 000000000..075630367
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_map_ret0.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") htab = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(long),
+	.max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") array = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(long),
+	.max_entries = 2,
+};
+
+/* Sample program which should always load for testing control paths. */
+SEC(".text") int func()
+{
+	__u64 key64 = 0;
+	__u32 key = 0;
+	long *value;
+
+	value = bpf_map_lookup_elem(&htab, &key);
+	if (!value)
+		return 1;
+	value = bpf_map_lookup_elem(&array, &key64);
+	if (!value)
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/sample_ret0.c
new file mode 100644
index 000000000..fec99750d
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_ret0.c
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+
+/* Sample program which should always load for testing control paths. */
+int func()
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/sendmsg4_prog.c b/tools/testing/selftests/bpf/sendmsg4_prog.c
new file mode 100644
index 000000000..a91536b1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg4_prog.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC1_IP4		0xAC100001U /* 172.16.0.1 */
+#define SRC2_IP4		0x00000000U
+#define SRC_REWRITE_IP4		0x7f000004U
+#define DST_IP4			0xC0A801FEU /* 192.168.1.254 */
+#define DST_REWRITE_IP4		0x7f000001U
+#define DST_PORT		4040
+#define DST_REWRITE_PORT4	4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg4")
+int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
+{
+	if (ctx->type != SOCK_DGRAM)
+		return 0;
+
+	/* Rewrite source. */
+	if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) ||
+	    ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) {
+		ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4);
+	} else {
+		/* Unexpected source. Reject sendmsg. */
+		return 0;
+	}
+
+	/* Rewrite destination. */
+	if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&
+	     ctx->user_port == bpf_htons(DST_PORT)) {
+		ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+		ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+	} else {
+		/* Unexpected source. Reject sendmsg. */
+		return 0;
+	}
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sendmsg6_prog.c b/tools/testing/selftests/bpf/sendmsg6_prog.c
new file mode 100644
index 000000000..a68062820
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg6_prog.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0	0
+#define SRC_REWRITE_IP6_1	0
+#define SRC_REWRITE_IP6_2	0
+#define SRC_REWRITE_IP6_3	6
+
+#define DST_REWRITE_IP6_0	0
+#define DST_REWRITE_IP6_1	0
+#define DST_REWRITE_IP6_2	0
+#define DST_REWRITE_IP6_3	1
+
+#define DST_REWRITE_PORT6	6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
+{
+	if (ctx->type != SOCK_DGRAM)
+		return 0;
+
+	/* Rewrite source. */
+	if (ctx->msg_src_ip6[3] == bpf_htonl(1) ||
+	    ctx->msg_src_ip6[3] == bpf_htonl(0)) {
+		ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+		ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+		ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+		ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+	} else {
+		/* Unexpected source. Reject sendmsg. */
+		return 0;
+	}
+
+	/* Rewrite destination. */
+	if (ctx->user_ip6[0] == bpf_htonl(0xFACEB00C)) {
+		ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+		ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+		ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+		ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+		ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+	} else {
+		/* Unexpected destination. Reject sendmsg. */
+		return 0;
+	}
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/socket_cookie_prog.c b/tools/testing/selftests/bpf/socket_cookie_prog.c
new file mode 100644
index 000000000..9ff8ac4b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/socket_cookie_prog.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct bpf_map_def SEC("maps") socket_cookies = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u64),
+	.value_size = sizeof(__u32),
+	.max_entries = 1 << 8,
+};
+
+SEC("cgroup/connect6")
+int set_cookie(struct bpf_sock_addr *ctx)
+{
+	__u32 cookie_value = 0xFF;
+	__u64 cookie_key;
+
+	if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+		return 1;
+
+	cookie_key = bpf_get_socket_cookie(ctx);
+	if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0))
+		return 0;
+
+	return 1;
+}
+
+SEC("sockops")
+int update_cookie(struct bpf_sock_ops *ctx)
+{
+	__u32 new_cookie_value;
+	__u32 *cookie_value;
+	__u64 cookie_key;
+
+	if (ctx->family != AF_INET6)
+		return 1;
+
+	if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+		return 1;
+
+	cookie_key = bpf_get_socket_cookie(ctx);
+
+	cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key);
+	if (!cookie_value)
+		return 1;
+
+	new_cookie_value = (ctx->local_port << 8) | *cookie_value;
+	bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
+
+	return 1;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c
new file mode 100644
index 000000000..0f92858f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c
@@ -0,0 +1,46 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long) skb->data_end;
+	void *data = (void *)(long) skb->data;
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	__u8 *d = data;
+	__u32 len = (__u32) data_end - (__u32) data;
+	int err;
+
+	if (data + 10 > data_end) {
+		err = bpf_skb_pull_data(skb, 10);
+		if (err)
+			return SK_DROP;
+
+		data_end = (void *)(long)skb->data_end;
+		data = (void *)(long)skb->data;
+		if (data + 10 > data_end)
+			return SK_DROP;
+	}
+
+	/* This write/read is a bit pointless but tests the verifier and
+	 * strparser handler for read/write pkt data and access into sk
+	 * fields.
+	 */
+	d = data;
+	d[7] = 1;
+	return skb->len;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
new file mode 100644
index 000000000..12a7b5c82
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
@@ -0,0 +1,33 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+SEC("sk_msg1")
+int bpf_prog1(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+
+	char *d;
+
+	if (data + 8 > data_end)
+		return SK_DROP;
+
+	bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
+	d = (char *)data;
+	bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
+
+	return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
new file mode 100644
index 000000000..2ce7634a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -0,0 +1,73 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+struct bpf_map_def SEC("maps") sock_map_rx = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_tx = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_msg = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_break = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long) skb->data_end;
+	void *data = (void *)(long) skb->data;
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	__u8 *d = data;
+	__u8 sk, map;
+
+	if (data + 8 > data_end)
+		return SK_DROP;
+
+	map = d[0];
+	sk = d[1];
+
+	d[0] = 0xd;
+	d[1] = 0xe;
+	d[2] = 0xa;
+	d[3] = 0xd;
+	d[4] = 0xb;
+	d[5] = 0xe;
+	d[6] = 0xe;
+	d[7] = 0xf;
+
+	if (!map)
+		return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
+	return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
new file mode 100755
index 000000000..7f8200a87
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+    buf = b''
+    while len(buf) < n:
+        rem = n - len(buf)
+        try: s = sock.recv(rem)
+        except (socket.error) as e: return b''
+        buf += s
+    return buf
+
+def send(sock, s):
+    total = len(s)
+    count = 0
+    while count < total:
+        try: n = sock.send(s)
+        except (socket.error) as e: n = 0
+        if n == 0:
+            return count;
+        count += n
+    return count
+
+
+serverPort = int(sys.argv[1])
+HostName = socket.gethostname()
+
+# create active socket
+sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+try:
+    sock.connect((HostName, serverPort))
+except socket.error as e:
+    sys.exit(1)
+
+buf = b''
+n = 0
+while n < 1000:
+    buf += b'+'
+    n += 1
+
+sock.settimeout(1);
+n = send(sock, buf)
+n = read(sock, 500)
+sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
new file mode 100755
index 000000000..b39903fca
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+    buf = b''
+    while len(buf) < n:
+        rem = n - len(buf)
+        try: s = sock.recv(rem)
+        except (socket.error) as e: return b''
+        buf += s
+    return buf
+
+def send(sock, s):
+    total = len(s)
+    count = 0
+    while count < total:
+        try: n = sock.send(s)
+        except (socket.error) as e: n = 0
+        if n == 0:
+            return count;
+        count += n
+    return count
+
+
+SERVER_PORT = 12877
+MAX_PORTS = 2
+
+serverPort = SERVER_PORT
+serverSocket = None
+
+HostName = socket.gethostname()
+
+# create passive socket
+serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+host = socket.gethostname()
+
+try: serverSocket.bind((host, 0))
+except socket.error as msg:
+    print('bind fails: ' + str(msg))
+
+sn = serverSocket.getsockname()
+serverPort = sn[1]
+
+cmdStr = ("./tcp_client.py %d &") % (serverPort)
+os.system(cmdStr)
+
+buf = b''
+n = 0
+while n < 500:
+    buf += b'.'
+    n += 1
+
+serverSocket.listen(MAX_PORTS)
+readList = [serverSocket]
+
+while True:
+    readyRead, readyWrite, inError = \
+        select.select(readList, [], [], 2)
+
+    if len(readyRead) > 0:
+        waitCount = 0
+        for sock in readyRead:
+            if sock == serverSocket:
+                (clientSocket, address) = serverSocket.accept()
+                address = str(address[0])
+                readList.append(clientSocket)
+            else:
+                sock.settimeout(1);
+                s = read(sock, 1000)
+                n = send(sock, buf)
+                sock.close()
+                serverSocket.close()
+                sys.exit(0)
+    else:
+        print('Select timeout!')
+        sys.exit(1)
diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c
new file mode 100644
index 000000000..4cd5e860c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_adjust_tail.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("xdp_adjust_tail")
+int _xdp_adjust_tail(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	int offset = 0;
+
+	if (data_end - data == 54)
+		offset = 256;
+	else
+		offset = 20;
+	if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+		return XDP_DROP;
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
new file mode 100644
index 000000000..3c789d03b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -0,0 +1,719 @@
+#include <asm/types.h>
+#include <linux/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include <linux/unistd.h>
+#include <linux/filter.h>
+#include <linux/bpf_perf_event.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf.h>
+
+#include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+#define MAX_INSNS	512
+#define MAX_MATCHES	16
+
+struct bpf_reg_match {
+	unsigned int line;
+	const char *match;
+};
+
+struct bpf_align_test {
+	const char *descr;
+	struct bpf_insn	insns[MAX_INSNS];
+	enum {
+		UNDEF,
+		ACCEPT,
+		REJECT
+	} result;
+	enum bpf_prog_type prog_type;
+	/* Matches must be in order of increasing line */
+	struct bpf_reg_match matches[MAX_MATCHES];
+};
+
+static struct bpf_align_test tests[] = {
+	/* Four tests of known constants.  These aren't staggeringly
+	 * interesting since we track exact values now.
+	 */
+	{
+		.descr = "mov",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 16),
+			BPF_MOV64_IMM(BPF_REG_3, 32),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3_w=inv2"},
+			{2, "R3_w=inv4"},
+			{3, "R3_w=inv8"},
+			{4, "R3_w=inv16"},
+			{5, "R3_w=inv32"},
+		},
+	},
+	{
+		.descr = "shift",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 32),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3_w=inv1"},
+			{2, "R3_w=inv2"},
+			{3, "R3_w=inv4"},
+			{4, "R3_w=inv8"},
+			{5, "R3_w=inv16"},
+			{6, "R3_w=inv1"},
+			{7, "R4_w=inv32"},
+			{8, "R4_w=inv16"},
+			{9, "R4_w=inv8"},
+			{10, "R4_w=inv4"},
+			{11, "R4_w=inv2"},
+		},
+	},
+	{
+		.descr = "addsub",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3_w=inv4"},
+			{2, "R3_w=inv8"},
+			{3, "R3_w=inv10"},
+			{4, "R4_w=inv8"},
+			{5, "R4_w=inv12"},
+			{6, "R4_w=inv14"},
+		},
+	},
+	{
+		.descr = "mul",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3_w=inv7"},
+			{2, "R3_w=inv7"},
+			{3, "R3_w=inv14"},
+			{4, "R3_w=inv56"},
+		},
+	},
+
+	/* Tests using unknown values */
+#define PREP_PKT_POINTERS \
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
+		    offsetof(struct __sk_buff, data)), \
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \
+		    offsetof(struct __sk_buff, data_end))
+
+#define LOAD_UNKNOWN(DST_REG) \
+	PREP_PKT_POINTERS, \
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \
+	BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \
+	BPF_EXIT_INSN(), \
+	BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0)
+
+	{
+		.descr = "unknown shift",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_3),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			LOAD_UNKNOWN(BPF_REG_4),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
+			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{18, "R3=pkt_end(id=0,off=0,imm=0)"},
+			{18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
+			{20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+		},
+	},
+	{
+		.descr = "unknown mul",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_3),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+		},
+	},
+	{
+		.descr = "packet const offset",
+		.insns = {
+			PREP_PKT_POINTERS,
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+
+			/* Skip over ethernet header.  */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3),
+			BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
+			{5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
+			{6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
+			{10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
+			{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
+			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+			{15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+		},
+	},
+	{
+		.descr = "packet variable offset",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+
+			/* First, add a constant to the R5 packet pointer,
+			 * then a variable with a known alignment.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			/* Now, test in the other direction.  Adding first
+			 * the variable offset to R5, then the constant.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			/* Test multiple accumulations of unknown values
+			 * into a packet pointer.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Offset is added to packet pointer R5, resulting in
+			 * known fixed offset, and variable offset from R6.
+			 */
+			{11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* At the time the word size load is performed from R5,
+			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
+			 * reg->aux_off (14) which is 16.  Then the variable
+			 * offset is considered using reg->aux_off_align which
+			 * is 4 and meets the load's requirements.
+			 */
+			{15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Variable offset is added to R5 packet pointer,
+			 * resulting in auxiliary alignment of 4.
+			 */
+			{18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Constant offset is added to R5, resulting in
+			 * reg->off of 14.
+			 */
+			{19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off
+			 * (14) which is 16.  Then the variable offset is 4-byte
+			 * aligned, so the total offset is 4-byte aligned and
+			 * meets the load's requirements.
+			 */
+			{23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Constant offset is added to R5 packet pointer,
+			 * resulting in reg->off value of 14.
+			 */
+			{26, "R5_w=pkt(id=0,off=14,r=8"},
+			/* Variable offset is added to R5, resulting in a
+			 * variable offset of (4n).
+			 */
+			{27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Constant is added to R5 again, setting reg->off to 18. */
+			{28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* And once more we add a variable; resulting var_off
+			 * is still (4n), fixed offset is not changed.
+			 * Also, we create a new reg->id.
+			 */
+			{29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
+			 * which is 20.  Then the variable offset is (4n), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+			{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+		},
+	},
+	{
+		.descr = "packet variable offset 2",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Add it to the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			/* Make a (4n) offset from the value we just read */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			/* Add it to the packet pointer */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* Packet pointer has (4n+2) offset */
+			{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* Newly read value in R6 was shifted left by 2, so has
+			 * known alignment of 4.
+			 */
+			{18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Added (4n) to packet pointer's (4n+2) var_off, giving
+			 * another (4n+2).
+			 */
+			{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+		},
+	},
+	{
+		.descr = "dubious pointer arithmetic",
+		.insns = {
+			PREP_PKT_POINTERS,
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* (ptr - ptr) << 2 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
+			/* We have a (4n) value.  Let's make a packet offset
+			 * out of it.  First add 14, to make it a (4n+2)
+			 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			/* Then make sure it's nonnegative */
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1),
+			BPF_EXIT_INSN(),
+			/* Add it to packet pointer */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.matches = {
+			{4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+			/* (ptr - ptr) << 2 == unknown, (4n) */
+			{6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+			/* (4n) + 14 == (4n+2).  We blow our bounds, because
+			 * the add could overflow.
+			 */
+			{7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+			/* Checked s>=0 */
+			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			/* packet pointer + nonnegative (4n+2) */
+			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
+			 * We checked the bounds, but it might have been able
+			 * to overflow if the packet pointer started in the
+			 * upper half of the address space.
+			 * So we did not get a 'range' on R6, and the access
+			 * attempt will fail.
+			 */
+			{15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+		}
+	},
+	{
+		.descr = "variable subtraction",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Create another unknown, (4n)-aligned, and subtract
+			 * it from the first one
+			 */
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7),
+			/* Bounds-check the result */
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1),
+			BPF_EXIT_INSN(),
+			/* Add it to the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* New unknown value in R7 is (4n) */
+			{11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Subtracting it from R6 blows our unsigned bounds */
+			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
+			/* Checked s>= 0 */
+			{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+		},
+	},
+	{
+		.descr = "pointer variable subtraction",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned and bounded
+			 * to [14,74]
+			 */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Subtract it from the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6),
+			/* Create another unknown, (4n)-aligned and >= 74.
+			 * That in fact means >= 76, since 74 % 4 == 2
+			 */
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76),
+			/* Add it to the packet pointer */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+			{10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+			/* Subtracting from packet pointer overflows ubounds */
+			{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+			/* New unknown value in R7 is (4n), >= 76 */
+			{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+			/* Adding it to packet pointer gives nice bounds again */
+			{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+		},
+	},
+};
+
+static int probe_filter_length(const struct bpf_insn *fp)
+{
+	int len;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].imm != 0)
+			break;
+	return len + 1;
+}
+
+static char bpf_vlog[32768];
+
+static int do_test_single(struct bpf_align_test *test)
+{
+	struct bpf_insn *prog = test->insns;
+	int prog_type = test->prog_type;
+	char bpf_vlog_copy[32768];
+	const char *line_ptr;
+	int cur_line = -1;
+	int prog_len, i;
+	int fd_prog;
+	int ret;
+
+	prog_len = probe_filter_length(prog);
+	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+				     prog, prog_len, BPF_F_STRICT_ALIGNMENT,
+				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2);
+	if (fd_prog < 0 && test->result != REJECT) {
+		printf("Failed to load program.\n");
+		printf("%s", bpf_vlog);
+		ret = 1;
+	} else if (fd_prog >= 0 && test->result == REJECT) {
+		printf("Unexpected success to load!\n");
+		printf("%s", bpf_vlog);
+		ret = 1;
+		close(fd_prog);
+	} else {
+		ret = 0;
+		/* We make a local copy so that we can strtok() it */
+		strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
+		line_ptr = strtok(bpf_vlog_copy, "\n");
+		for (i = 0; i < MAX_MATCHES; i++) {
+			struct bpf_reg_match m = test->matches[i];
+
+			if (!m.match)
+				break;
+			while (line_ptr) {
+				cur_line = -1;
+				sscanf(line_ptr, "%u: ", &cur_line);
+				if (cur_line == m.line)
+					break;
+				line_ptr = strtok(NULL, "\n");
+			}
+			if (!line_ptr) {
+				printf("Failed to find line %u for match: %s\n",
+				       m.line, m.match);
+				ret = 1;
+				printf("%s", bpf_vlog);
+				break;
+			}
+			if (!strstr(line_ptr, m.match)) {
+				printf("Failed to find match %u: %s\n",
+				       m.line, m.match);
+				ret = 1;
+				printf("%s", bpf_vlog);
+				break;
+			}
+		}
+		if (fd_prog >= 0)
+			close(fd_prog);
+	}
+	return ret;
+}
+
+static int do_test(unsigned int from, unsigned int to)
+{
+	int all_pass = 0;
+	int all_fail = 0;
+	unsigned int i;
+
+	for (i = from; i < to; i++) {
+		struct bpf_align_test *test = &tests[i];
+		int fail;
+
+		printf("Test %3d: %s ... ",
+		       i, test->descr);
+		fail = do_test_single(test);
+		if (fail) {
+			all_fail++;
+			printf("FAIL\n");
+		} else {
+			all_pass++;
+			printf("PASS\n");
+		}
+	}
+	printf("Results: %d pass %d fail\n",
+	       all_pass, all_fail);
+	return all_fail ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int from = 0, to = ARRAY_SIZE(tests);
+
+	if (argc == 3) {
+		unsigned int l = atoi(argv[argc - 2]);
+		unsigned int u = atoi(argv[argc - 1]);
+
+		if (l < to && u < to) {
+			from = l;
+			to   = u + 1;
+		}
+	} else if (argc == 2) {
+		unsigned int t = atoi(argv[argc - 1]);
+
+		if (t < to) {
+			from = t;
+			to   = t + 1;
+		}
+	}
+	return do_test(from, to);
+}
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
new file mode 100644
index 000000000..29116366a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -0,0 +1,2840 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <bpf/bpf.h>
+#include <sys/resource.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+static uint32_t pass_cnt;
+static uint32_t error_cnt;
+static uint32_t skip_cnt;
+
+#define CHECK(condition, format...) ({					\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);	\
+		fprintf(stderr, format);				\
+	}								\
+	__ret;								\
+})
+
+static int count_result(int err)
+{
+	if (err)
+		error_cnt++;
+	else
+		pass_cnt++;
+
+	fprintf(stderr, "\n");
+	return err;
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define __printf(a, b)	__attribute__((format(printf, a, b)))
+
+__printf(1, 2)
+static int __base_pr(const char *format, ...)
+{
+	va_list args;
+	int err;
+
+	va_start(args, format);
+	err = vfprintf(stderr, format, args);
+	va_end(args);
+	return err;
+}
+
+#define BTF_INFO_ENC(kind, root, vlen)			\
+	((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+
+#define BTF_TYPE_ENC(name, info, size_or_type)	\
+	(name), (info), (size_or_type)
+
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits)	\
+	((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz)	\
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz),	\
+	BTF_INT_ENC(encoding, bits_offset, bits)
+
+#define BTF_ARRAY_ENC(type, index_type, nr_elems)	\
+	(type), (index_type), (nr_elems)
+#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \
+	BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \
+	BTF_ARRAY_ENC(type, index_type, nr_elems)
+
+#define BTF_MEMBER_ENC(name, type, bits_offset)	\
+	(name), (type), (bits_offset)
+#define BTF_ENUM_ENC(name, val) (name), (val)
+
+#define BTF_TYPEDEF_ENC(name, type) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type)
+
+#define BTF_PTR_ENC(name, type) \
+	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
+
+#define BTF_END_RAW 0xdeadbeef
+#define NAME_TBD 0xdeadb33f
+
+#define MAX_NR_RAW_TYPES 1024
+#define BTF_LOG_BUF_SIZE 65535
+
+static struct args {
+	unsigned int raw_test_num;
+	unsigned int file_test_num;
+	unsigned int get_info_test_num;
+	bool raw_test;
+	bool file_test;
+	bool get_info_test;
+	bool pprint_test;
+	bool always_log;
+} args;
+
+static char btf_log_buf[BTF_LOG_BUF_SIZE];
+
+static struct btf_header hdr_tmpl = {
+	.magic = BTF_MAGIC,
+	.version = BTF_VERSION,
+	.hdr_len = sizeof(struct btf_header),
+};
+
+struct btf_raw_test {
+	const char *descr;
+	const char *str_sec;
+	const char *map_name;
+	const char *err_str;
+	__u32 raw_types[MAX_NR_RAW_TYPES];
+	__u32 str_sec_size;
+	enum bpf_map_type map_type;
+	__u32 key_size;
+	__u32 value_size;
+	__u32 key_type_id;
+	__u32 value_type_id;
+	__u32 max_entries;
+	bool btf_load_err;
+	bool map_create_err;
+	bool ordered_map;
+	bool lossless_map;
+	int hdr_len_delta;
+	int type_off_delta;
+	int str_off_delta;
+	int str_len_delta;
+};
+
+static struct btf_raw_test raw_tests[] = {
+/* enum E {
+ *     E0,
+ *     E1,
+ * };
+ *
+ * struct A {
+ *	unsigned long long m;
+ *	int n;
+ *	char o;
+ *	[3 bytes hole]
+ *	int p[8];
+ *	int q[4][8];
+ *	enum E r;
+ * };
+ */
+{
+	.descr = "struct test #1",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* unsigned long long */
+		BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),		/* [2] */
+		/* char */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1),	/* [3] */
+		/* int[8] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 8),			/* [4] */
+		/* struct A { */				/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),	/* unsigned long long m;*/
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8]		*/
+		BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8]		*/
+		BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r		*/
+		/* } */
+		/* int[4][8] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 4),			/* [6] */
+		/* enum E */					/* [7] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_ENUM_ENC(NAME_TBD, 1),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_test1_map",
+	.key_size = sizeof(int),
+	.value_size = 180,
+	.key_type_id = 1,
+	.value_type_id = 5,
+	.max_entries = 4,
+},
+
+/* typedef struct b Struct_B;
+ *
+ * struct A {
+ *     int m;
+ *     struct b n[4];
+ *     const Struct_B o[4];
+ * };
+ *
+ * struct B {
+ *     int m;
+ *     int n;
+ * };
+ */
+{
+	.descr = "struct test #2",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct b [4] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 4),
+
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4]	*/
+		BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/
+		/* } */
+
+		/* struct B { */				/* [4] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
+		/* } */
+
+		/* const int */					/* [5] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+		/* typedef struct b Struct_B */	/* [6] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4),
+		/* const Struct_B */				/* [7] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6),
+		/* const Struct_B [4] */			/* [8] */
+		BTF_TYPE_ARRAY_ENC(7, 1, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B",
+	.str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_test2_map",
+	.key_size = sizeof(int),
+	.value_size = 68,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+},
+
+{
+	.descr = "struct test #3 Invalid member offset",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int64 */					/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8),
+
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 16),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 64),	/* int m;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),		/* int64 n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n\0",
+	.str_sec_size = sizeof("\0A\0m\0n\0"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_test3_map",
+	.key_size = sizeof(int),
+	.value_size = 16,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid member bits_offset",
+},
+
+/* Test member exceeds the size of struct.
+ *
+ * struct A {
+ *     int m;
+ *     int n;
+ * };
+ */
+{
+	.descr = "size check test #1",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct A { */				/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 -  1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n",
+	.str_sec_size = sizeof("\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check1_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ *     int m;
+ *     int n[2];
+ * };
+ */
+{
+	.descr = "size check test #2",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+		/* int[2] */					/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 2),
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n",
+	.str_sec_size = sizeof("\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check2_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ *     int m;
+ *     void *n;
+ * };
+ */
+{
+	.descr = "size check test #3",
+	.raw_types = {
+		/* int */					/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+		/* void* */					/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+		/* struct A { */				/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0n",
+	.str_sec_size = sizeof("\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check3_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
+},
+
+/* Test member exceeds the size of struct
+ *
+ * enum E {
+ *     E0,
+ *     E1,
+ * };
+ *
+ * struct A {
+ *     int m;
+ *     enum E n;
+ * };
+ */
+{
+	.descr = "size check test #4",
+	.raw_types = {
+		/* int */			/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+		/* enum E { */			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_ENUM_ENC(NAME_TBD, 1),
+		/* } */
+		/* struct A { */		/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0E\0E0\0E1\0A\0m\0n",
+	.str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "size_check4_map",
+	.key_size = sizeof(int),
+	.value_size = 1,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Member exceeds struct_size",
+},
+
+/* typedef const void * const_void_ptr;
+ * struct A {
+ *	const_void_ptr m;
+ * };
+ */
+{
+	.descr = "void test #1",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* const void* */	/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+		/* typedef const void * const_void_ptr */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),	/* [4] */
+		/* struct A { */	/* [5] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/* const_void_ptr m; */
+		BTF_MEMBER_ENC(NAME_TBD, 4, 0),
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0const_void_ptr\0A\0m",
+	.str_sec_size = sizeof("\0const_void_ptr\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test1_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_type_id = 1,
+	.value_type_id = 4,
+	.max_entries = 4,
+},
+
+/* struct A {
+ *     const void m;
+ * };
+ */
+{
+	.descr = "void test #2",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* struct A { */	/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8),
+		/* const void m; */
+		BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+		/* } */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m",
+	.str_sec_size = sizeof("\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test2_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid member",
+},
+
+/* typedef const void * const_void_ptr;
+ * const_void_ptr[4]
+ */
+{
+	.descr = "void test #3",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* const void* */	/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+		/* typedef const void * const_void_ptr */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),	/* [4] */
+		/* const_void_ptr[4] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 4),	/* [5] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0const_void_ptr",
+	.str_sec_size = sizeof("\0const_void_ptr"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test3_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *) * 4,
+	.key_type_id = 1,
+	.value_type_id = 5,
+	.max_entries = 4,
+},
+
+/* const void[4]  */
+{
+	.descr = "void test #4",
+	.raw_types = {
+		/* int */		/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void */	/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		/* const void[4] */	/* [3] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m",
+	.str_sec_size = sizeof("\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "void_test4_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *) * 4,
+	.key_type_id = 1,
+	.value_type_id = 3,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid elem",
+},
+
+/* Array_A  <------------------+
+ *     elem_type == Array_B    |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array A --+
+ */
+{
+	.descr = "loop test #1",
+	.raw_types = {
+		/* int */			/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* Array_A */			/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		/* Array_B */			/* [3] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test1_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+/* typedef is _before_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A  <------------------+
+ *     elem_type == int_array  |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array_A --+
+ */
+{
+	.descr = "loop test #2",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* typedef Array_B int_array */
+		BTF_TYPEDEF_ENC(1, 4),				/* [2] */
+		/* Array_A */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),			/* [3] */
+		/* Array_B */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),			/* [4] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int_array\0",
+	.str_sec_size = sizeof("\0int_array"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test2_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+/* Array_A  <------------------+
+ *     elem_type == Array_B    |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array_A --+
+ */
+{
+	.descr = "loop test #3",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* Array_A */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		/* Array_B */				/* [3] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test3_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+/* typedef is _between_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A  <------------------+
+ *     elem_type == int_array  |
+ *                    |        |
+ *                    |        |
+ * Array_B  <-------- +        |
+ *      elem_type == Array_A --+
+ */
+{
+	.descr = "loop test #4",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* Array_A */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 8),
+		/* typedef Array_B int_array */		/* [3] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),
+		/* Array_B */				/* [4] */
+		BTF_TYPE_ARRAY_ENC(2, 1, 8),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int_array\0",
+	.str_sec_size = sizeof("\0int_array"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test4_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(sizeof(int) * 8),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+/* typedef struct B Struct_B
+ *
+ * struct A {
+ *     int x;
+ *     Struct_B y;
+ * };
+ *
+ * struct B {
+ *     int x;
+ *     struct A y;
+ * };
+ */
+{
+	.descr = "loop test #5",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		/* struct A */					/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int x;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y;	*/
+		/* typedef struct B Struct_B */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),			/* [3] */
+		/* struct B */					/* [4] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int x;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y;	*/
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y",
+	.str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test5_map",
+	.key_size = sizeof(int),
+	.value_size = 8,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+/* struct A {
+ *     int x;
+ *     struct A array_a[4];
+ * };
+ */
+{
+	.descr = "loop test #6",
+	.raw_types = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 4),			/* [2] */
+		/* struct A */					/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),	/* int x;		*/
+		BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4];	*/
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0x\0y",
+	.str_sec_size = sizeof("\0A\0x\0y"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test6_map",
+	.key_size = sizeof(int),
+	.value_size = 8,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+{
+	.descr = "loop test #7",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct A { */			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/*     const void *m;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 3, 0),
+		/* CONST type_id=3	*/		/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+		/* PTR type_id=2	*/		/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m",
+	.str_sec_size = sizeof("\0A\0m"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test7_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+{
+	.descr = "loop test #8",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* struct A { */			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/*     const void *m;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 4, 0),
+		/* struct B { */			/* [3] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+		/*     const void *n;	*/
+		BTF_MEMBER_ENC(NAME_TBD, 6, 0),
+		/* CONST type_id=5	*/		/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5),
+		/* PTR type_id=6	*/		/* [5] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6),
+		/* CONST type_id=7	*/		/* [6] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7),
+		/* PTR type_id=4	*/		/* [7] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0m\0B\0n",
+	.str_sec_size = sizeof("\0A\0m\0B\0n"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "loop_test8_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(void *),
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Loop detected",
+},
+
+{
+	.descr = "string section does not end with null",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int") - 1,
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid string section",
+},
+
+{
+	.descr = "empty string section",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = 0,
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid string section",
+},
+
+{
+	.descr = "empty type section",
+	.raw_types = {
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "No type found",
+},
+
+{
+	.descr = "btf_header test. Longer hdr_len",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.hdr_len_delta = 4,
+	.err_str = "Unsupported btf_header",
+},
+
+{
+	.descr = "btf_header test. Gap between hdr and type",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.type_off_delta = 4,
+	.err_str = "Unsupported section found",
+},
+
+{
+	.descr = "btf_header test. Gap between type and str",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_off_delta = 4,
+	.err_str = "Unsupported section found",
+},
+
+{
+	.descr = "btf_header test. Overlap between type and str",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_off_delta = -4,
+	.err_str = "Section overlap found",
+},
+
+{
+	.descr = "btf_header test. Larger BTF size",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_len_delta = -4,
+	.err_str = "Unsupported section found",
+},
+
+{
+	.descr = "btf_header test. Smaller BTF size",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0int",
+	.str_sec_size = sizeof("\0int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "hdr_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.str_len_delta = 4,
+	.err_str = "Total section length too long",
+},
+
+{
+	.descr = "array test. index_type/elem_type \"int\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 1, 16),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "array test. index_type/elem_type \"const int\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 3, 16),
+		/* CONST type_id=1 */			/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "array test. index_type \"const int:31\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int:31 */				/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
+		/* int[16] */				/* [3] */
+		BTF_TYPE_ARRAY_ENC(1, 4, 16),
+		/* CONST type_id=2 */			/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test. elem_type \"const int:31\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int:31 */				/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
+		/* int[16] */				/* [3] */
+		BTF_TYPE_ARRAY_ENC(4, 1, 16),
+		/* CONST type_id=2 */			/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid array of int",
+},
+
+{
+	.descr = "array test. index_type \"void\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 0, 16),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test. index_type \"const void\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(1, 3, 16),
+		/* CONST type_id=0 (void) */		/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test. elem_type \"const void\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 16),
+		/* CONST type_id=0 (void) */		/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid elem",
+},
+
+{
+	.descr = "array test. elem_type \"const void *\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void *[16] */			/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 1, 16),
+		/* CONST type_id=4 */			/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+		/* void* */				/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "array test. index_type \"const void *\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* const void *[16] */			/* [2] */
+		BTF_TYPE_ARRAY_ENC(3, 3, 16),
+		/* CONST type_id=4 */			/* [3] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+		/* void* */				/* [4] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid index",
+},
+
+{
+	.descr = "array test. t->size != 0\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* int[16] */				/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 1),
+		BTF_ARRAY_ENC(1, 1, 16),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "size != 0",
+},
+
+{
+	.descr = "int test. invalid int_data",
+	.raw_types = {
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4),
+		0x10000000,
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid int_data",
+},
+
+{
+	.descr = "invalid BTF_INFO",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_TYPE_ENC(0, 0x10000000, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid btf_info",
+},
+
+{
+	.descr = "fwd test. t->type != 0\"",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* fwd type */				/* [2] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 1),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "fwd_test_map",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "type != 0",
+},
+
+{
+	.descr = "typedef (invalid name, name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPEDEF_ENC(0, 1),				/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "typedef_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "typedef (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 1),			/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__!int",
+	.str_sec_size = sizeof("\0__!int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "typedef_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "ptr type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "ptr_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "volatile type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "volatile_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "const type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "const_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "restrict type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1),	/* [2] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2),	/* [3] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__int",
+	.str_sec_size = sizeof("\0__int"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "restrict_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "fwd type (invalid name, name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__skb",
+	.str_sec_size = sizeof("\0__skb"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "fwd_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "fwd type (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0),	/* [2] */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__!skb",
+	.str_sec_size = sizeof("\0__!skb"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "fwd_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "array type (invalid name, name_off <> 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0),	/* [2] */
+		BTF_ARRAY_ENC(1, 1, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0__skb",
+	.str_sec_size = sizeof("\0__skb"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "struct type (name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A",
+	.str_sec_size = sizeof("\0A"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "struct type (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A!\0B",
+	.str_sec_size = sizeof("\0A!\0B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "struct member (name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A",
+	.str_sec_size = sizeof("\0A"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "struct member (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4),	/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0B*",
+	.str_sec_size = sizeof("\0A\0B*"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "struct_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "enum type (name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A\0B",
+	.str_sec_size = sizeof("\0A\0B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+},
+
+{
+	.descr = "enum type (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(NAME_TBD,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A!\0B",
+	.str_sec_size = sizeof("\0A!\0B"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "enum member (invalid name, name_off = 0)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(0, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+
+{
+	.descr = "enum member (invalid name, invalid identifier)",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */
+		BTF_TYPE_ENC(0,
+			     BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+			     sizeof(int)),				/* [2] */
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_END_RAW,
+	},
+	.str_sec = "\0A!",
+	.str_sec_size = sizeof("\0A!"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "enum_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.btf_load_err = true,
+	.err_str = "Invalid name",
+},
+{
+	.descr = "arraymap invalid btf key (a bit field)",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* 32 bit int with 32 bit offset */	/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 32, 32, 8),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_map_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 2,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.map_create_err = true,
+},
+
+{
+	.descr = "arraymap invalid btf key (!= 32 bits)",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* 16 bit int with 0 bit offset */	/* [2] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 16, 2),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_map_check_btf",
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.key_type_id = 2,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.map_create_err = true,
+},
+
+{
+	.descr = "arraymap invalid btf value (too small)",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_map_check_btf",
+	.key_size = sizeof(int),
+	/* btf_value_size < map->value_size */
+	.value_size = sizeof(__u64),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.map_create_err = true,
+},
+
+{
+	.descr = "arraymap invalid btf value (too big)",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "array_map_check_btf",
+	.key_size = sizeof(int),
+	/* btf_value_size > map->value_size */
+	.value_size = sizeof(__u16),
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 4,
+	.map_create_err = true,
+},
+
+}; /* struct btf_raw_test raw_tests[] */
+
+static const char *get_next_str(const char *start, const char *end)
+{
+	return start < end - 1 ? start + 1 : NULL;
+}
+
+static int get_type_sec_size(const __u32 *raw_types)
+{
+	int i;
+
+	for (i = MAX_NR_RAW_TYPES - 1;
+	     i >= 0 && raw_types[i] != BTF_END_RAW;
+	     i--)
+		;
+
+	return i < 0 ? i : i * sizeof(raw_types[0]);
+}
+
+static void *btf_raw_create(const struct btf_header *hdr,
+			    const __u32 *raw_types,
+			    const char *str,
+			    unsigned int str_sec_size,
+			    unsigned int *btf_size)
+{
+	const char *next_str = str, *end_str = str + str_sec_size;
+	unsigned int size_needed, offset;
+	struct btf_header *ret_hdr;
+	int i, type_sec_size;
+	uint32_t *ret_types;
+	void *raw_btf;
+
+	type_sec_size = get_type_sec_size(raw_types);
+	if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types"))
+		return NULL;
+
+	size_needed = sizeof(*hdr) + type_sec_size + str_sec_size;
+	raw_btf = malloc(size_needed);
+	if (CHECK(!raw_btf, "Cannot allocate memory for raw_btf"))
+		return NULL;
+
+	/* Copy header */
+	memcpy(raw_btf, hdr, sizeof(*hdr));
+	offset = sizeof(*hdr);
+
+	/* Copy type section */
+	ret_types = raw_btf + offset;
+	for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) {
+		if (raw_types[i] == NAME_TBD) {
+			next_str = get_next_str(next_str, end_str);
+			if (CHECK(!next_str, "Error in getting next_str")) {
+				free(raw_btf);
+				return NULL;
+			}
+			ret_types[i] = next_str - str;
+			next_str += strlen(next_str);
+		} else {
+			ret_types[i] = raw_types[i];
+		}
+	}
+	offset += type_sec_size;
+
+	/* Copy string section */
+	memcpy(raw_btf + offset, str, str_sec_size);
+
+	ret_hdr = (struct btf_header *)raw_btf;
+	ret_hdr->type_len = type_sec_size;
+	ret_hdr->str_off = type_sec_size;
+	ret_hdr->str_len = str_sec_size;
+
+	*btf_size = size_needed;
+
+	return raw_btf;
+}
+
+static int do_test_raw(unsigned int test_num)
+{
+	struct btf_raw_test *test = &raw_tests[test_num - 1];
+	struct bpf_create_map_attr create_attr = {};
+	int map_fd = -1, btf_fd = -1;
+	unsigned int raw_btf_size;
+	struct btf_header *hdr;
+	void *raw_btf;
+	int err;
+
+	fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	hdr = raw_btf;
+
+	hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta;
+	hdr->type_off = (int)hdr->type_off + test->type_off_delta;
+	hdr->str_off = (int)hdr->str_off + test->str_off_delta;
+	hdr->str_len = (int)hdr->str_len + test->str_len_delta;
+
+	*btf_log_buf = '\0';
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	free(raw_btf);
+
+	err = ((btf_fd == -1) != test->btf_load_err);
+	if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
+		  btf_fd, test->btf_load_err) ||
+	    CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
+		  "expected err_str:%s", test->err_str)) {
+		err = -1;
+		goto done;
+	}
+
+	if (err || btf_fd == -1)
+		goto done;
+
+	create_attr.name = test->map_name;
+	create_attr.map_type = test->map_type;
+	create_attr.key_size = test->key_size;
+	create_attr.value_size = test->value_size;
+	create_attr.max_entries = test->max_entries;
+	create_attr.btf_fd = btf_fd;
+	create_attr.btf_key_type_id = test->key_type_id;
+	create_attr.btf_value_type_id = test->value_type_id;
+
+	map_fd = bpf_create_map_xattr(&create_attr);
+
+	err = ((map_fd == -1) != test->map_create_err);
+	CHECK(err, "map_fd:%d test->map_create_err:%u",
+	      map_fd, test->map_create_err);
+
+done:
+	if (!err)
+		fprintf(stderr, "OK");
+
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	if (btf_fd != -1)
+		close(btf_fd);
+	if (map_fd != -1)
+		close(map_fd);
+
+	return err;
+}
+
+static int test_raw(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	if (args.raw_test_num)
+		return count_result(do_test_raw(args.raw_test_num));
+
+	for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+		err |= count_result(do_test_raw(i));
+
+	return err;
+}
+
+struct btf_get_info_test {
+	const char *descr;
+	const char *str_sec;
+	__u32 raw_types[MAX_NR_RAW_TYPES];
+	__u32 str_sec_size;
+	int btf_size_delta;
+	int (*special_test)(unsigned int test_num);
+};
+
+static int test_big_btf_info(unsigned int test_num);
+static int test_btf_id(unsigned int test_num);
+
+const struct btf_get_info_test get_info_tests[] = {
+{
+	.descr = "== raw_btf_size+1",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.btf_size_delta = 1,
+},
+{
+	.descr = "== raw_btf_size-3",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.btf_size_delta = -3,
+},
+{
+	.descr = "Large bpf_btf_info",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.special_test = test_big_btf_info,
+},
+{
+	.descr = "BTF ID",
+	.raw_types = {
+		/* int */				/* [1] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+		/* unsigned int */			/* [2] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+		BTF_END_RAW,
+	},
+	.str_sec = "",
+	.str_sec_size = sizeof(""),
+	.special_test = test_btf_id,
+},
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64)(unsigned long)ptr;
+}
+
+static int test_big_btf_info(unsigned int test_num)
+{
+	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+	uint8_t *raw_btf = NULL, *user_btf = NULL;
+	unsigned int raw_btf_size;
+	struct {
+		struct bpf_btf_info info;
+		uint64_t garbage;
+	} info_garbage;
+	struct bpf_btf_info *info;
+	int btf_fd = -1, err;
+	uint32_t info_len;
+
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+
+	user_btf = malloc(raw_btf_size);
+	if (CHECK(!user_btf, "!user_btf")) {
+		err = -1;
+		goto done;
+	}
+
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	/*
+	 * GET_INFO should error out if the userspace info
+	 * has non zero tailing bytes.
+	 */
+	info = &info_garbage.info;
+	memset(info, 0, sizeof(*info));
+	info_garbage.garbage = 0xdeadbeef;
+	info_len = sizeof(info_garbage);
+	info->btf = ptr_to_u64(user_btf);
+	info->btf_size = raw_btf_size;
+
+	err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+	if (CHECK(!err, "!err")) {
+		err = -1;
+		goto done;
+	}
+
+	/*
+	 * GET_INFO should succeed even info_len is larger than
+	 * the kernel supported as long as tailing bytes are zero.
+	 * The kernel supported info len should also be returned
+	 * to userspace.
+	 */
+	info_garbage.garbage = 0;
+	err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+	if (CHECK(err || info_len != sizeof(*info),
+		  "err:%d errno:%d info_len:%u sizeof(*info):%lu",
+		  err, errno, info_len, sizeof(*info))) {
+		err = -1;
+		goto done;
+	}
+
+	fprintf(stderr, "OK");
+
+done:
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	free(raw_btf);
+	free(user_btf);
+
+	if (btf_fd != -1)
+		close(btf_fd);
+
+	return err;
+}
+
+static int test_btf_id(unsigned int test_num)
+{
+	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+	struct bpf_create_map_attr create_attr = {};
+	uint8_t *raw_btf = NULL, *user_btf[2] = {};
+	int btf_fd[2] = {-1, -1}, map_fd = -1;
+	struct bpf_map_info map_info = {};
+	struct bpf_btf_info info[2] = {};
+	unsigned int raw_btf_size;
+	uint32_t info_len;
+	int err, i, ret;
+
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+
+	for (i = 0; i < 2; i++) {
+		user_btf[i] = malloc(raw_btf_size);
+		if (CHECK(!user_btf[i], "!user_btf[%d]", i)) {
+			err = -1;
+			goto done;
+		}
+		info[i].btf = ptr_to_u64(user_btf[i]);
+		info[i].btf_size = raw_btf_size;
+	}
+
+	btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
+				 btf_log_buf, BTF_LOG_BUF_SIZE,
+				 args.always_log);
+	if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	/* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */
+	info_len = sizeof(info[0]);
+	err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len);
+	if (CHECK(err, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
+	if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	ret = 0;
+	err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len);
+	if (CHECK(err || info[0].id != info[1].id ||
+		  info[0].btf_size != info[1].btf_size ||
+		  (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)),
+		  "err:%d errno:%d id0:%u id1:%u btf_size0:%u btf_size1:%u memcmp:%d",
+		  err, errno, info[0].id, info[1].id,
+		  info[0].btf_size, info[1].btf_size, ret)) {
+		err = -1;
+		goto done;
+	}
+
+	/* Test btf members in struct bpf_map_info */
+	create_attr.name = "test_btf_id";
+	create_attr.map_type = BPF_MAP_TYPE_ARRAY;
+	create_attr.key_size = sizeof(int);
+	create_attr.value_size = sizeof(unsigned int);
+	create_attr.max_entries = 4;
+	create_attr.btf_fd = btf_fd[0];
+	create_attr.btf_key_type_id = 1;
+	create_attr.btf_value_type_id = 2;
+
+	map_fd = bpf_create_map_xattr(&create_attr);
+	if (CHECK(map_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	info_len = sizeof(map_info);
+	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+	if (CHECK(err || map_info.btf_id != info[0].id ||
+		  map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
+		  "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
+		  err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id,
+		  map_info.btf_value_type_id)) {
+		err = -1;
+		goto done;
+	}
+
+	for (i = 0; i < 2; i++) {
+		close(btf_fd[i]);
+		btf_fd[i] = -1;
+	}
+
+	/* Test BTF ID is removed from the kernel */
+	btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+	if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+	close(btf_fd[0]);
+	btf_fd[0] = -1;
+
+	/* The map holds the last ref to BTF and its btf_id */
+	close(map_fd);
+	map_fd = -1;
+	btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+	if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+		err = -1;
+		goto done;
+	}
+
+	fprintf(stderr, "OK");
+
+done:
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	free(raw_btf);
+	if (map_fd != -1)
+		close(map_fd);
+	for (i = 0; i < 2; i++) {
+		free(user_btf[i]);
+		if (btf_fd[i] != -1)
+			close(btf_fd[i]);
+	}
+
+	return err;
+}
+
+static int do_test_get_info(unsigned int test_num)
+{
+	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+	unsigned int raw_btf_size, user_btf_size, expected_nbytes;
+	uint8_t *raw_btf = NULL, *user_btf = NULL;
+	struct bpf_btf_info info = {};
+	int btf_fd = -1, err, ret;
+	uint32_t info_len;
+
+	fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
+		test_num, test->descr);
+
+	if (test->special_test)
+		return test->special_test(test_num);
+
+	raw_btf = btf_raw_create(&hdr_tmpl,
+				 test->raw_types,
+				 test->str_sec,
+				 test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+
+	user_btf = malloc(raw_btf_size);
+	if (CHECK(!user_btf, "!user_btf")) {
+		err = -1;
+		goto done;
+	}
+
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	user_btf_size = (int)raw_btf_size + test->btf_size_delta;
+	expected_nbytes = min(raw_btf_size, user_btf_size);
+	if (raw_btf_size > expected_nbytes)
+		memset(user_btf + expected_nbytes, 0xff,
+		       raw_btf_size - expected_nbytes);
+
+	info_len = sizeof(info);
+	info.btf = ptr_to_u64(user_btf);
+	info.btf_size = user_btf_size;
+
+	ret = 0;
+	err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len);
+	if (CHECK(err || !info.id || info_len != sizeof(info) ||
+		  info.btf_size != raw_btf_size ||
+		  (ret = memcmp(raw_btf, user_btf, expected_nbytes)),
+		  "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
+		  err, errno, info.id, info_len, sizeof(info),
+		  raw_btf_size, info.btf_size, expected_nbytes, ret)) {
+		err = -1;
+		goto done;
+	}
+
+	while (expected_nbytes < raw_btf_size) {
+		fprintf(stderr, "%u...", expected_nbytes);
+		if (CHECK(user_btf[expected_nbytes++] != 0xff,
+			  "user_btf[%u]:%x != 0xff", expected_nbytes - 1,
+			  user_btf[expected_nbytes - 1])) {
+			err = -1;
+			goto done;
+		}
+	}
+
+	fprintf(stderr, "OK");
+
+done:
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+
+	free(raw_btf);
+	free(user_btf);
+
+	if (btf_fd != -1)
+		close(btf_fd);
+
+	return err;
+}
+
+static int test_get_info(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	if (args.get_info_test_num)
+		return count_result(do_test_get_info(args.get_info_test_num));
+
+	for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+		err |= count_result(do_test_get_info(i));
+
+	return err;
+}
+
+struct btf_file_test {
+	const char *file;
+	bool btf_kv_notfound;
+};
+
+static struct btf_file_test file_tests[] = {
+{
+	.file = "test_btf_haskv.o",
+},
+{
+	.file = "test_btf_nokv.o",
+	.btf_kv_notfound = true,
+},
+};
+
+static int file_has_btf_elf(const char *fn)
+{
+	Elf_Scn *scn = NULL;
+	GElf_Ehdr ehdr;
+	int elf_fd;
+	Elf *elf;
+	int ret;
+
+	if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
+		  "elf_version(EV_CURRENT) == EV_NONE"))
+		return -1;
+
+	elf_fd = open(fn, O_RDONLY);
+	if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno))
+		return -1;
+
+	elf = elf_begin(elf_fd, ELF_C_READ, NULL);
+	if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) {
+		ret = -1;
+		goto done;
+	}
+
+	if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) {
+		ret = -1;
+		goto done;
+	}
+
+	while ((scn = elf_nextscn(elf, scn))) {
+		const char *sh_name;
+		GElf_Shdr sh;
+
+		if (CHECK(gelf_getshdr(scn, &sh) != &sh,
+			  "file:%s gelf_getshdr != &sh", fn)) {
+			ret = -1;
+			goto done;
+		}
+
+		sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+		if (!strcmp(sh_name, BTF_ELF_SEC)) {
+			ret = 1;
+			goto done;
+		}
+	}
+
+	ret = 0;
+
+done:
+	close(elf_fd);
+	elf_end(elf);
+	return ret;
+}
+
+static int do_test_file(unsigned int test_num)
+{
+	const struct btf_file_test *test = &file_tests[test_num - 1];
+	struct bpf_object *obj = NULL;
+	struct bpf_program *prog;
+	struct bpf_map *map;
+	int err;
+
+	fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
+		test->file);
+
+	err = file_has_btf_elf(test->file);
+	if (err == -1)
+		return err;
+
+	if (err == 0) {
+		fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
+		skip_cnt++;
+		return 0;
+	}
+
+	obj = bpf_object__open(test->file);
+	if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+		return PTR_ERR(obj);
+
+	err = bpf_object__btf_fd(obj);
+	if (CHECK(err == -1, "bpf_object__btf_fd: -1"))
+		goto done;
+
+	prog = bpf_program__next(NULL, obj);
+	if (CHECK(!prog, "Cannot find bpf_prog")) {
+		err = -1;
+		goto done;
+	}
+
+	bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
+	err = bpf_object__load(obj);
+	if (CHECK(err < 0, "bpf_object__load: %d", err))
+		goto done;
+
+	map = bpf_object__find_map_by_name(obj, "btf_map");
+	if (CHECK(!map, "btf_map not found")) {
+		err = -1;
+		goto done;
+	}
+
+	err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0)
+		!= test->btf_kv_notfound;
+	if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u",
+		  bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map),
+		  test->btf_kv_notfound))
+		goto done;
+
+	fprintf(stderr, "OK");
+
+done:
+	bpf_object__close(obj);
+	return err;
+}
+
+static int test_file(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	if (args.file_test_num)
+		return count_result(do_test_file(args.file_test_num));
+
+	for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+		err |= count_result(do_test_file(i));
+
+	return err;
+}
+
+const char *pprint_enum_str[] = {
+	"ENUM_ZERO",
+	"ENUM_ONE",
+	"ENUM_TWO",
+	"ENUM_THREE",
+};
+
+struct pprint_mapv {
+	uint32_t ui32;
+	uint16_t ui16;
+	/* 2 bytes hole */
+	int32_t si32;
+	uint32_t unused_bits2a:2,
+		bits28:28,
+		unused_bits2b:2;
+	union {
+		uint64_t ui64;
+		uint8_t ui8a[8];
+	};
+	enum {
+		ENUM_ZERO,
+		ENUM_ONE,
+		ENUM_TWO,
+		ENUM_THREE,
+	} aenum;
+};
+
+static struct btf_raw_test pprint_test_template = {
+	.raw_types = {
+		/* unsighed char */			/* [1] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
+		/* unsigned short */			/* [2] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2),
+		/* unsigned int */			/* [3] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),
+		/* int */				/* [4] */
+		BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+		/* unsigned long long */		/* [5] */
+		BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8),
+		/* 2 bits */				/* [6] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 2, 2),
+		/* 28 bits */				/* [7] */
+		BTF_TYPE_INT_ENC(0, 0, 0, 28, 4),
+		/* uint8_t[8] */			/* [8] */
+		BTF_TYPE_ARRAY_ENC(9, 1, 8),
+		/* typedef unsigned char uint8_t */	/* [9] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 1),
+		/* typedef unsigned short uint16_t */	/* [10] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 2),
+		/* typedef unsigned int uint32_t */	/* [11] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),
+		/* typedef int int32_t */		/* [12] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),
+		/* typedef unsigned long long uint64_t *//* [13] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 5),
+		/* union (anon) */			/* [14] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8),
+		BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */
+		BTF_MEMBER_ENC(NAME_TBD, 8, 0),	/* uint8_t ui8a[8]; */
+		/* enum (anon) */			/* [15] */
+		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4),
+		BTF_ENUM_ENC(NAME_TBD, 0),
+		BTF_ENUM_ENC(NAME_TBD, 1),
+		BTF_ENUM_ENC(NAME_TBD, 2),
+		BTF_ENUM_ENC(NAME_TBD, 3),
+		/* struct pprint_mapv */		/* [16] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 32),
+		BTF_MEMBER_ENC(NAME_TBD, 11, 0),	/* uint32_t ui32 */
+		BTF_MEMBER_ENC(NAME_TBD, 10, 32),	/* uint16_t ui16 */
+		BTF_MEMBER_ENC(NAME_TBD, 12, 64),	/* int32_t si32 */
+		BTF_MEMBER_ENC(NAME_TBD, 6, 96),	/* unused_bits2a */
+		BTF_MEMBER_ENC(NAME_TBD, 7, 98),	/* bits28 */
+		BTF_MEMBER_ENC(NAME_TBD, 6, 126),	/* unused_bits2b */
+		BTF_MEMBER_ENC(0, 14, 128),		/* union (anon) */
+		BTF_MEMBER_ENC(NAME_TBD, 15, 192),	/* aenum */
+		BTF_END_RAW,
+	},
+	.str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum",
+	.str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"),
+	.key_size = sizeof(unsigned int),
+	.value_size = sizeof(struct pprint_mapv),
+	.key_type_id = 3,	/* unsigned int */
+	.value_type_id = 16,	/* struct pprint_mapv */
+	.max_entries = 128 * 1024,
+};
+
+static struct btf_pprint_test_meta {
+	const char *descr;
+	enum bpf_map_type map_type;
+	const char *map_name;
+	bool ordered_map;
+	bool lossless_map;
+} pprint_tests_meta[] = {
+{
+	.descr = "BTF pretty print array",
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "pprint_test_array",
+	.ordered_map = true,
+	.lossless_map = true,
+},
+
+{
+	.descr = "BTF pretty print hash",
+	.map_type = BPF_MAP_TYPE_HASH,
+	.map_name = "pprint_test_hash",
+	.ordered_map = false,
+	.lossless_map = true,
+},
+
+{
+	.descr = "BTF pretty print lru hash",
+	.map_type = BPF_MAP_TYPE_LRU_HASH,
+	.map_name = "pprint_test_lru_hash",
+	.ordered_map = false,
+	.lossless_map = false,
+},
+
+};
+
+
+static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
+{
+	v->ui32 = i;
+	v->si32 = -i;
+	v->unused_bits2a = 3;
+	v->bits28 = i;
+	v->unused_bits2b = 3;
+	v->ui64 = i;
+	v->aenum = i & 0x03;
+}
+
+static int do_test_pprint(void)
+{
+	const struct btf_raw_test *test = &pprint_test_template;
+	struct bpf_create_map_attr create_attr = {};
+	unsigned int key, nr_read_elems;
+	bool ordered_map, lossless_map;
+	int map_fd = -1, btf_fd = -1;
+	struct pprint_mapv mapv = {};
+	unsigned int raw_btf_size;
+	char expected_line[255];
+	FILE *pin_file = NULL;
+	char pin_path[255];
+	size_t line_len = 0;
+	char *line = NULL;
+	uint8_t *raw_btf;
+	ssize_t nread;
+	int err, ret;
+
+	fprintf(stderr, "%s......", test->descr);
+	raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
+				 test->str_sec, test->str_sec_size,
+				 &raw_btf_size);
+
+	if (!raw_btf)
+		return -1;
+
+	*btf_log_buf = '\0';
+	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+			      btf_log_buf, BTF_LOG_BUF_SIZE,
+			      args.always_log);
+	free(raw_btf);
+
+	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	create_attr.name = test->map_name;
+	create_attr.map_type = test->map_type;
+	create_attr.key_size = test->key_size;
+	create_attr.value_size = test->value_size;
+	create_attr.max_entries = test->max_entries;
+	create_attr.btf_fd = btf_fd;
+	create_attr.btf_key_type_id = test->key_type_id;
+	create_attr.btf_value_type_id = test->value_type_id;
+
+	map_fd = bpf_create_map_xattr(&create_attr);
+	if (CHECK(map_fd == -1, "errno:%d", errno)) {
+		err = -1;
+		goto done;
+	}
+
+	ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
+		       "/sys/fs/bpf", test->map_name);
+
+	if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
+		  "/sys/fs/bpf", test->map_name)) {
+		err = -1;
+		goto done;
+	}
+
+	err = bpf_obj_pin(map_fd, pin_path);
+	if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
+		goto done;
+
+	for (key = 0; key < test->max_entries; key++) {
+		set_pprint_mapv(&mapv, key);
+		bpf_map_update_elem(map_fd, &key, &mapv, 0);
+	}
+
+	pin_file = fopen(pin_path, "r");
+	if (CHECK(!pin_file, "fopen(%s): errno:%d", pin_path, errno)) {
+		err = -1;
+		goto done;
+	}
+
+	/* Skip lines start with '#' */
+	while ((nread = getline(&line, &line_len, pin_file)) > 0 &&
+	       *line == '#')
+		;
+
+	if (CHECK(nread <= 0, "Unexpected EOF")) {
+		err = -1;
+		goto done;
+	}
+
+	nr_read_elems = 0;
+	ordered_map = test->ordered_map;
+	lossless_map = test->lossless_map;
+	do {
+		ssize_t nexpected_line;
+		unsigned int next_key;
+
+		next_key = ordered_map ? nr_read_elems : atoi(line);
+		set_pprint_mapv(&mapv, next_key);
+		nexpected_line = snprintf(expected_line, sizeof(expected_line),
+					  "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
+					  next_key,
+					  mapv.ui32, mapv.si32,
+					  mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
+					  mapv.ui64,
+					  mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3],
+					  mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
+					  pprint_enum_str[mapv.aenum]);
+
+		if (CHECK(nexpected_line == sizeof(expected_line),
+			  "expected_line is too long")) {
+			err = -1;
+			goto done;
+		}
+
+		if (strcmp(expected_line, line)) {
+			err = -1;
+			fprintf(stderr, "unexpected pprint output\n");
+			fprintf(stderr, "expected: %s", expected_line);
+			fprintf(stderr, "    read: %s", line);
+			goto done;
+		}
+
+		nread = getline(&line, &line_len, pin_file);
+	} while (++nr_read_elems < test->max_entries && nread > 0);
+
+	if (lossless_map &&
+	    CHECK(nr_read_elems < test->max_entries,
+		  "Unexpected EOF. nr_read_elems:%u test->max_entries:%u",
+		  nr_read_elems, test->max_entries)) {
+		err = -1;
+		goto done;
+	}
+
+	if (CHECK(nread > 0, "Unexpected extra pprint output: %s", line)) {
+		err = -1;
+		goto done;
+	}
+
+	err = 0;
+
+done:
+	if (!err)
+		fprintf(stderr, "OK");
+	if (*btf_log_buf && (err || args.always_log))
+		fprintf(stderr, "\n%s", btf_log_buf);
+	if (btf_fd != -1)
+		close(btf_fd);
+	if (map_fd != -1)
+		close(map_fd);
+	if (pin_file)
+		fclose(pin_file);
+	unlink(pin_path);
+	free(line);
+
+	return err;
+}
+
+static int test_pprint(void)
+{
+	unsigned int i;
+	int err = 0;
+
+	for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) {
+		pprint_test_template.descr = pprint_tests_meta[i].descr;
+		pprint_test_template.map_type = pprint_tests_meta[i].map_type;
+		pprint_test_template.map_name = pprint_tests_meta[i].map_name;
+		pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map;
+		pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map;
+
+		err |= count_result(do_test_pprint());
+	}
+
+	return err;
+}
+
+static void usage(const char *cmd)
+{
+	fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n",
+		cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
+		ARRAY_SIZE(file_tests));
+}
+
+static int parse_args(int argc, char **argv)
+{
+	const char *optstr = "lpf:r:g:";
+	int opt;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		switch (opt) {
+		case 'l':
+			args.always_log = true;
+			break;
+		case 'f':
+			args.file_test_num = atoi(optarg);
+			args.file_test = true;
+			break;
+		case 'r':
+			args.raw_test_num = atoi(optarg);
+			args.raw_test = true;
+			break;
+		case 'g':
+			args.get_info_test_num = atoi(optarg);
+			args.get_info_test = true;
+			break;
+		case 'p':
+			args.pprint_test = true;
+			break;
+		case 'h':
+			usage(argv[0]);
+			exit(0);
+		default:
+				usage(argv[0]);
+				return -1;
+		}
+	}
+
+	if (args.raw_test_num &&
+	    (args.raw_test_num < 1 ||
+	     args.raw_test_num > ARRAY_SIZE(raw_tests))) {
+		fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
+			ARRAY_SIZE(raw_tests));
+		return -1;
+	}
+
+	if (args.file_test_num &&
+	    (args.file_test_num < 1 ||
+	     args.file_test_num > ARRAY_SIZE(file_tests))) {
+		fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
+			ARRAY_SIZE(file_tests));
+		return -1;
+	}
+
+	if (args.get_info_test_num &&
+	    (args.get_info_test_num < 1 ||
+	     args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
+		fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
+			ARRAY_SIZE(get_info_tests));
+		return -1;
+	}
+
+	return 0;
+}
+
+static void print_summary(void)
+{
+	fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
+		pass_cnt - skip_cnt, skip_cnt, error_cnt);
+}
+
+int main(int argc, char **argv)
+{
+	int err = 0;
+
+	err = parse_args(argc, argv);
+	if (err)
+		return err;
+
+	if (args.always_log)
+		libbpf_set_print(__base_pr, __base_pr, __base_pr);
+
+	if (args.raw_test)
+		err |= test_raw();
+
+	if (args.get_info_test)
+		err |= test_get_info();
+
+	if (args.file_test)
+		err |= test_file();
+
+	if (args.pprint_test)
+		err |= test_pprint();
+
+	if (args.raw_test || args.get_info_test || args.file_test ||
+	    args.pprint_test)
+		goto done;
+
+	err |= test_raw();
+	err |= test_get_info();
+	err |= test_file();
+
+done:
+	print_summary();
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c
new file mode 100644
index 000000000..b21b876f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+	unsigned int v4;
+	unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct ipv_counts),
+	.max_entries = 4,
+};
+
+BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+
+struct dummy_tracepoint_args {
+	unsigned long long pad;
+	struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	struct ipv_counts *counts;
+	int key = 0;
+
+	if (!arg->sock)
+		return 0;
+
+	counts = bpf_map_lookup_elem(&btf_map, &key);
+	if (!counts)
+		return 0;
+
+	counts->v6++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c
new file mode 100644
index 000000000..0ed8e088e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_nokv.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+	unsigned int v4;
+	unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(struct ipv_counts),
+	.max_entries = 4,
+};
+
+struct dummy_tracepoint_args {
+	unsigned long long pad;
+	struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	struct ipv_counts *counts;
+	int key = 0;
+
+	if (!arg->sock)
+		return 0;
+
+	counts = bpf_map_lookup_elem(&btf_map, &key);
+	if (!counts)
+		return 0;
+
+	counts->v6++;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
new file mode 100644
index 000000000..4e196e3bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <bpf/bpf.h>
+#include <linux/filter.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/"
+
+int main(int argc, char **argv)
+{
+	struct bpf_insn prog[] = {
+		BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
+		BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_get_local_storage),
+		BPF_MOV64_IMM(BPF_REG_1, 1),
+		BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+		BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+		BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
+		BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+		BPF_EXIT_INSN(),
+	};
+	size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+	int error = EXIT_FAILURE;
+	int map_fd, prog_fd, cgroup_fd;
+	struct bpf_cgroup_storage_key key;
+	unsigned long long value;
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
+				sizeof(value), 0, 0);
+	if (map_fd < 0) {
+		printf("Failed to create map: %s\n", strerror(errno));
+		goto out;
+	}
+
+	prog[0].imm = map_fd;
+	prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+				   prog, insns_cnt, "GPL", 0,
+				   bpf_log_buf, BPF_LOG_BUF_SIZE);
+	if (prog_fd < 0) {
+		printf("Failed to load bpf program: %s\n", bpf_log_buf);
+		goto out;
+	}
+
+	if (setup_cgroup_environment()) {
+		printf("Failed to setup cgroup environment\n");
+		goto err;
+	}
+
+	/* Create a cgroup, get fd, and join it */
+	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+	if (!cgroup_fd) {
+		printf("Failed to create test cgroup\n");
+		goto err;
+	}
+
+	if (join_cgroup(TEST_CGROUP)) {
+		printf("Failed to join cgroup\n");
+		goto err;
+	}
+
+	/* Attach the bpf program */
+	if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
+		printf("Failed to attach bpf program\n");
+		goto err;
+	}
+
+	if (bpf_map_get_next_key(map_fd, NULL, &key)) {
+		printf("Failed to get the first key in cgroup storage\n");
+		goto err;
+	}
+
+	if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+		printf("Failed to lookup cgroup storage\n");
+		goto err;
+	}
+
+	/* Every second packet should be dropped */
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+	/* Check the counter in the cgroup local storage */
+	if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+		printf("Failed to lookup cgroup storage\n");
+		goto err;
+	}
+
+	if (value != 3) {
+		printf("Unexpected data in the cgroup storage: %llu\n", value);
+		goto err;
+	}
+
+	/* Bump the counter in the cgroup local storage */
+	value++;
+	if (bpf_map_update_elem(map_fd, &key, &value, 0)) {
+		printf("Failed to update the data in the cgroup storage\n");
+		goto err;
+	}
+
+	/* Every second packet should be dropped */
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+	assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+	/* Check the final value of the counter in the cgroup local storage */
+	if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+		printf("Failed to lookup the cgroup storage\n");
+		goto err;
+	}
+
+	if (value != 7) {
+		printf("Unexpected data in the cgroup storage: %llu\n", value);
+		goto err;
+	}
+
+	error = 0;
+	printf("test_cgroup_storage:PASS\n");
+
+err:
+	cleanup_cgroup_environment();
+
+out:
+	return error;
+}
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
new file mode 100644
index 000000000..9c8b50bac
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -0,0 +1,96 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/time.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define DEV_CGROUP_PROG "./dev_cgroup.o"
+
+#define TEST_CGROUP "/test-bpf-based-device-cgroup/"
+
+int main(int argc, char **argv)
+{
+	struct bpf_object *obj;
+	int error = EXIT_FAILURE;
+	int prog_fd, cgroup_fd;
+	__u32 prog_cnt;
+
+	if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
+			  &obj, &prog_fd)) {
+		printf("Failed to load DEV_CGROUP program\n");
+		goto out;
+	}
+
+	if (setup_cgroup_environment()) {
+		printf("Failed to load DEV_CGROUP program\n");
+		goto err;
+	}
+
+	/* Create a cgroup, get fd, and join it */
+	cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+	if (!cgroup_fd) {
+		printf("Failed to create test cgroup\n");
+		goto err;
+	}
+
+	if (join_cgroup(TEST_CGROUP)) {
+		printf("Failed to join cgroup\n");
+		goto err;
+	}
+
+	/* Attach bpf program */
+	if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_DEVICE, 0)) {
+		printf("Failed to attach DEV_CGROUP program");
+		goto err;
+	}
+
+	if (bpf_prog_query(cgroup_fd, BPF_CGROUP_DEVICE, 0, NULL, NULL,
+			   &prog_cnt)) {
+		printf("Failed to query attached programs");
+		goto err;
+	}
+
+	/* All operations with /dev/zero and and /dev/urandom are allowed,
+	 * everything else is forbidden.
+	 */
+	assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
+	assert(system("mknod /tmp/test_dev_cgroup_null c 1 3"));
+	assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
+
+	/* /dev/zero is whitelisted */
+	assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
+	assert(system("mknod /tmp/test_dev_cgroup_zero c 1 5") == 0);
+	assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
+
+	assert(system("dd if=/dev/urandom of=/dev/zero count=64") == 0);
+
+	/* src is allowed, target is forbidden */
+	assert(system("dd if=/dev/urandom of=/dev/full count=64"));
+
+	/* src is forbidden, target is allowed */
+	assert(system("dd if=/dev/random of=/dev/zero count=64"));
+
+	error = 0;
+	printf("test_dev_cgroup:PASS\n");
+
+err:
+	cleanup_cgroup_environment();
+
+out:
+	return error;
+}
diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
new file mode 100644
index 000000000..f6d9f238e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Permit pretty deep stack traces */
+#define MAX_STACK_RAWTP 100
+struct stack_trace_t {
+	int pid;
+	int kern_stack_size;
+	int user_stack_size;
+	int user_stack_buildid_size;
+	__u64 kern_stack[MAX_STACK_RAWTP];
+	__u64 user_stack[MAX_STACK_RAWTP];
+	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+struct bpf_map_def SEC("maps") perfmap = {
+	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(__u32),
+	.max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") stackdata_map = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct stack_trace_t),
+	.max_entries = 1,
+};
+
+/* Allocate per-cpu space twice the needed. For the code below
+ *   usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+ *   if (usize < 0)
+ *     return 0;
+ *   ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+ *
+ * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
+ * verifier will complain that access "raw_data + usize"
+ * with size "max_len - usize" may be out of bound.
+ * The maximum "raw_data + usize" is "raw_data + max_len"
+ * and the maximum "max_len - usize" is "max_len", verifier
+ * concludes that the maximum buffer access range is
+ * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
+ *
+ * Doubling the to-be-used max buffer size can fix this verifier
+ * issue and avoid complicated C programming massaging.
+ * This is an acceptable workaround since there is one entry here.
+ */
+struct bpf_map_def SEC("maps") rawdata_map = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
+	.max_entries = 1,
+};
+
+SEC("tracepoint/raw_syscalls/sys_enter")
+int bpf_prog1(void *ctx)
+{
+	int max_len, max_buildid_len, usize, ksize, total_size;
+	struct stack_trace_t *data;
+	void *raw_data;
+	__u32 key = 0;
+
+	data = bpf_map_lookup_elem(&stackdata_map, &key);
+	if (!data)
+		return 0;
+
+	max_len = MAX_STACK_RAWTP * sizeof(__u64);
+	max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
+	data->pid = bpf_get_current_pid_tgid();
+	data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
+					      max_len, 0);
+	data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
+					    BPF_F_USER_STACK);
+	data->user_stack_buildid_size = bpf_get_stack(
+		ctx, data->user_stack_buildid, max_buildid_len,
+		BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+	bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
+
+	/* write both kernel and user stacks to the same buffer */
+	raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
+	if (!raw_data)
+		return 0;
+
+	usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+	if (usize < 0)
+		return 0;
+
+	ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+	if (ksize < 0)
+		return 0;
+
+	total_size = usize + ksize;
+	if (total_size > 0 && total_size <= max_len)
+		bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_iptunnel_common.h b/tools/testing/selftests/bpf/test_iptunnel_common.h
new file mode 100644
index 000000000..e4cd252a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_iptunnel_common.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _TEST_IPTNL_COMMON_H
+#define _TEST_IPTNL_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_IPTNL_ENTRIES 256U
+
+struct vip {
+	union {
+		__u32 v6[4];
+		__u32 v4;
+	} daddr;
+	__u16 dport;
+	__u16 family;
+	__u8 protocol;
+};
+
+struct iptnl_info {
+	union {
+		__u32 v6[4];
+		__u32 v4;
+	} saddr;
+	union {
+		__u32 v6[4];
+		__u32 v4;
+	} daddr;
+	__u16 family;
+	__u8 dmac[6];
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
new file mode 100755
index 000000000..9df0d2ac4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ "$(id -u)" != "0" ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
+SRC_TREE=../../../../
+
+test_run()
+{
+	sysctl -w net.core.bpf_jit_enable=$1 2>&1 > /dev/null
+	sysctl -w net.core.bpf_jit_harden=$2 2>&1 > /dev/null
+
+	echo "[ JIT enabled:$1 hardened:$2 ]"
+	dmesg -C
+	if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
+		insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
+		if [ $? -ne 0 ]; then
+			rc=1
+		fi
+	else
+		# Use modprobe dry run to check for missing test_bpf module
+		if ! /sbin/modprobe -q -n test_bpf; then
+			echo "test_bpf: [SKIP]"
+		elif /sbin/modprobe -q test_bpf; then
+			echo "test_bpf: ok"
+		else
+			echo "test_bpf: [FAIL]"
+			rc=1
+		fi
+	fi
+	rmmod  test_bpf 2> /dev/null
+	dmesg | grep FAIL
+}
+
+test_save()
+{
+	JE=`sysctl -n net.core.bpf_jit_enable`
+	JH=`sysctl -n net.core.bpf_jit_harden`
+}
+
+test_restore()
+{
+	sysctl -w net.core.bpf_jit_enable=$JE 2>&1 > /dev/null
+	sysctl -w net.core.bpf_jit_harden=$JH 2>&1 > /dev/null
+}
+
+rc=0
+test_save
+test_run 0 0
+test_run 1 0
+test_run 1 1
+test_run 1 2
+test_restore
+exit $rc
diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/test_l4lb.c
new file mode 100644
index 000000000..1e10c9590
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_l4lb.c
@@ -0,0 +1,473 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+	return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+#define JHASH_INITVAL		0xdeadbeef
+
+typedef unsigned int u32;
+
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const unsigned char *k = key;
+
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	while (length > 12) {
+		a += *(u32 *)(k);
+		b += *(u32 *)(k + 4);
+		c += *(u32 *)(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += initval;
+	b += initval;
+	c += initval;
+	__jhash_final(a, b, c);
+	return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+	union {
+		__be32 src;
+		__be32 srcv6[4];
+	};
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	union {
+		__u32 ports;
+		__u16 port16[2];
+	};
+	__u8 proto;
+	__u8 flags;
+};
+
+struct ctl_value {
+	union {
+		__u64 value;
+		__u32 ifindex;
+		__u8 mac[6];
+	};
+};
+
+struct vip_meta {
+	__u32 flags;
+	__u32 vip_num;
+};
+
+struct real_definition {
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	__u8 flags;
+};
+
+struct vip_stats {
+	__u64 bytes;
+	__u64 pkts;
+};
+
+struct eth_hdr {
+	unsigned char eth_dest[ETH_ALEN];
+	unsigned char eth_source[ETH_ALEN];
+	unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip),
+	.value_size = sizeof(struct vip_meta),
+	.max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct real_definition),
+	.max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct vip_stats),
+	.max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct ctl_value),
+	.max_entries = CTL_MAP_SIZE,
+};
+
+static __always_inline __u32 get_packet_hash(struct packet_description *pckt,
+					     bool ipv6)
+{
+	if (ipv6)
+		return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+				    pckt->ports, CH_RINGS_SIZE);
+	else
+		return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static __always_inline bool get_packet_dst(struct real_definition **real,
+					   struct packet_description *pckt,
+					   struct vip_meta *vip_info,
+					   bool is_ipv6)
+{
+	__u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE;
+	__u32 key = RING_SIZE * vip_info->vip_num + hash;
+	__u32 *real_pos;
+
+	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+	if (!real_pos)
+		return false;
+	key = *real_pos;
+	*real = bpf_map_lookup_elem(&reals, &key);
+	if (!(*real))
+		return false;
+	return true;
+}
+
+static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off,
+					struct packet_description *pckt)
+{
+	struct icmp6hdr *icmp_hdr;
+	struct ipv6hdr *ip6h;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+		return TC_ACT_OK;
+	off += sizeof(struct icmp6hdr);
+	ip6h = data + off;
+	if (ip6h + 1 > data_end)
+		return TC_ACT_SHOT;
+	pckt->proto = ip6h->nexthdr;
+	pckt->flags |= F_ICMP;
+	memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+	memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+	return TC_ACT_UNSPEC;
+}
+
+static __always_inline int parse_icmp(void *data, void *data_end, __u64 off,
+				      struct packet_description *pckt)
+{
+	struct icmphdr *icmp_hdr;
+	struct iphdr *iph;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+	    icmp_hdr->code != ICMP_FRAG_NEEDED)
+		return TC_ACT_OK;
+	off += sizeof(struct icmphdr);
+	iph = data + off;
+	if (iph + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (iph->ihl != 5)
+		return TC_ACT_SHOT;
+	pckt->proto = iph->protocol;
+	pckt->flags |= F_ICMP;
+	pckt->src = iph->daddr;
+	pckt->dst = iph->saddr;
+	return TC_ACT_UNSPEC;
+}
+
+static __always_inline bool parse_udp(void *data, __u64 off, void *data_end,
+				      struct packet_description *pckt)
+{
+	struct udphdr *udp;
+	udp = data + off;
+
+	if (udp + 1 > data_end)
+		return false;
+
+	if (!(pckt->flags & F_ICMP)) {
+		pckt->port16[0] = udp->source;
+		pckt->port16[1] = udp->dest;
+	} else {
+		pckt->port16[0] = udp->dest;
+		pckt->port16[1] = udp->source;
+	}
+	return true;
+}
+
+static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end,
+				      struct packet_description *pckt)
+{
+	struct tcphdr *tcp;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return false;
+
+	if (tcp->syn)
+		pckt->flags |= F_SYN_SET;
+
+	if (!(pckt->flags & F_ICMP)) {
+		pckt->port16[0] = tcp->source;
+		pckt->port16[1] = tcp->dest;
+	} else {
+		pckt->port16[0] = tcp->dest;
+		pckt->port16[1] = tcp->source;
+	}
+	return true;
+}
+
+static __always_inline int process_packet(void *data, __u64 off, void *data_end,
+					  bool is_ipv6, struct __sk_buff *skb)
+{
+	void *pkt_start = (void *)(long)skb->data;
+	struct packet_description pckt = {};
+	struct eth_hdr *eth = pkt_start;
+	struct bpf_tunnel_key tkey = {};
+	struct vip_stats *data_stats;
+	struct real_definition *dst;
+	struct vip_meta *vip_info;
+	struct ctl_value *cval;
+	__u32 v4_intf_pos = 1;
+	__u32 v6_intf_pos = 2;
+	struct ipv6hdr *ip6h;
+	struct vip vip = {};
+	struct iphdr *iph;
+	int tun_flag = 0;
+	__u16 pkt_bytes;
+	__u64 iph_len;
+	__u32 ifindex;
+	__u8 protocol;
+	__u32 vip_num;
+	int action;
+
+	tkey.tunnel_ttl = 64;
+	if (is_ipv6) {
+		ip6h = data + off;
+		if (ip6h + 1 > data_end)
+			return TC_ACT_SHOT;
+
+		iph_len = sizeof(struct ipv6hdr);
+		protocol = ip6h->nexthdr;
+		pckt.proto = protocol;
+		pkt_bytes = bpf_ntohs(ip6h->payload_len);
+		off += iph_len;
+		if (protocol == IPPROTO_FRAGMENT) {
+			return TC_ACT_SHOT;
+		} else if (protocol == IPPROTO_ICMPV6) {
+			action = parse_icmpv6(data, data_end, off, &pckt);
+			if (action >= 0)
+				return action;
+			off += IPV6_PLUS_ICMP_HDR;
+		} else {
+			memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+			memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+		}
+	} else {
+		iph = data + off;
+		if (iph + 1 > data_end)
+			return TC_ACT_SHOT;
+		if (iph->ihl != 5)
+			return TC_ACT_SHOT;
+
+		protocol = iph->protocol;
+		pckt.proto = protocol;
+		pkt_bytes = bpf_ntohs(iph->tot_len);
+		off += IPV4_HDR_LEN_NO_OPT;
+
+		if (iph->frag_off & PCKT_FRAGMENTED)
+			return TC_ACT_SHOT;
+		if (protocol == IPPROTO_ICMP) {
+			action = parse_icmp(data, data_end, off, &pckt);
+			if (action >= 0)
+				return action;
+			off += IPV4_PLUS_ICMP_HDR;
+		} else {
+			pckt.src = iph->saddr;
+			pckt.dst = iph->daddr;
+		}
+	}
+	protocol = pckt.proto;
+
+	if (protocol == IPPROTO_TCP) {
+		if (!parse_tcp(data, off, data_end, &pckt))
+			return TC_ACT_SHOT;
+	} else if (protocol == IPPROTO_UDP) {
+		if (!parse_udp(data, off, data_end, &pckt))
+			return TC_ACT_SHOT;
+	} else {
+		return TC_ACT_SHOT;
+	}
+
+	if (is_ipv6)
+		memcpy(vip.daddr.v6, pckt.dstv6, 16);
+	else
+		vip.daddr.v4 = pckt.dst;
+
+	vip.dport = pckt.port16[1];
+	vip.protocol = pckt.proto;
+	vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+	if (!vip_info) {
+		vip.dport = 0;
+		vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+		if (!vip_info)
+			return TC_ACT_SHOT;
+		pckt.port16[1] = 0;
+	}
+
+	if (vip_info->flags & F_HASH_NO_SRC_PORT)
+		pckt.port16[0] = 0;
+
+	if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+		return TC_ACT_SHOT;
+
+	if (dst->flags & F_IPV6) {
+		cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+		if (!cval)
+			return TC_ACT_SHOT;
+		ifindex = cval->ifindex;
+		memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+		tun_flag = BPF_F_TUNINFO_IPV6;
+	} else {
+		cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+		if (!cval)
+			return TC_ACT_SHOT;
+		ifindex = cval->ifindex;
+		tkey.remote_ipv4 = dst->dst;
+	}
+	vip_num = vip_info->vip_num;
+	data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+	if (!data_stats)
+		return TC_ACT_SHOT;
+	data_stats->pkts++;
+	data_stats->bytes += pkt_bytes;
+	bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+	*(u32 *)eth->eth_dest = tkey.remote_ipv4;
+	return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct eth_hdr *eth = data;
+	__u32 eth_proto;
+	__u32 nh_off;
+
+	nh_off = sizeof(struct eth_hdr);
+	if (data + nh_off > data_end)
+		return TC_ACT_SHOT;
+	eth_proto = eth->eth_proto;
+	if (eth_proto == bpf_htons(ETH_P_IP))
+		return process_packet(data, nh_off, data_end, false, ctx);
+	else if (eth_proto == bpf_htons(ETH_P_IPV6))
+		return process_packet(data, nh_off, data_end, true, ctx);
+	else
+		return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c
new file mode 100644
index 000000000..ba44a14e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+	return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+#define JHASH_INITVAL		0xdeadbeef
+
+typedef unsigned int u32;
+
+static u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const unsigned char *k = key;
+
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	while (length > 12) {
+		a += *(u32 *)(k);
+		b += *(u32 *)(k + 4);
+		c += *(u32 *)(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += initval;
+	b += initval;
+	c += initval;
+	__jhash_final(a, b, c);
+	return c;
+}
+
+static u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+	union {
+		__be32 src;
+		__be32 srcv6[4];
+	};
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	union {
+		__u32 ports;
+		__u16 port16[2];
+	};
+	__u8 proto;
+	__u8 flags;
+};
+
+struct ctl_value {
+	union {
+		__u64 value;
+		__u32 ifindex;
+		__u8 mac[6];
+	};
+};
+
+struct vip_meta {
+	__u32 flags;
+	__u32 vip_num;
+};
+
+struct real_definition {
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	__u8 flags;
+};
+
+struct vip_stats {
+	__u64 bytes;
+	__u64 pkts;
+};
+
+struct eth_hdr {
+	unsigned char eth_dest[ETH_ALEN];
+	unsigned char eth_source[ETH_ALEN];
+	unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip),
+	.value_size = sizeof(struct vip_meta),
+	.max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct real_definition),
+	.max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct vip_stats),
+	.max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct ctl_value),
+	.max_entries = CTL_MAP_SIZE,
+};
+
+static __u32 get_packet_hash(struct packet_description *pckt,
+			     bool ipv6)
+{
+	if (ipv6)
+		return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+				    pckt->ports, CH_RINGS_SIZE);
+	else
+		return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static bool get_packet_dst(struct real_definition **real,
+			   struct packet_description *pckt,
+			   struct vip_meta *vip_info,
+			   bool is_ipv6)
+{
+	__u32 hash = get_packet_hash(pckt, is_ipv6);
+	__u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
+	__u32 *real_pos;
+
+	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
+	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+		return 0;
+
+	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+	if (!real_pos)
+		return false;
+	key = *real_pos;
+	*real = bpf_map_lookup_elem(&reals, &key);
+	if (!(*real))
+		return false;
+	return true;
+}
+
+static int parse_icmpv6(void *data, void *data_end, __u64 off,
+			struct packet_description *pckt)
+{
+	struct icmp6hdr *icmp_hdr;
+	struct ipv6hdr *ip6h;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+		return TC_ACT_OK;
+	off += sizeof(struct icmp6hdr);
+	ip6h = data + off;
+	if (ip6h + 1 > data_end)
+		return TC_ACT_SHOT;
+	pckt->proto = ip6h->nexthdr;
+	pckt->flags |= F_ICMP;
+	memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+	memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+	return TC_ACT_UNSPEC;
+}
+
+static int parse_icmp(void *data, void *data_end, __u64 off,
+		      struct packet_description *pckt)
+{
+	struct icmphdr *icmp_hdr;
+	struct iphdr *iph;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+	    icmp_hdr->code != ICMP_FRAG_NEEDED)
+		return TC_ACT_OK;
+	off += sizeof(struct icmphdr);
+	iph = data + off;
+	if (iph + 1 > data_end)
+		return TC_ACT_SHOT;
+	if (iph->ihl != 5)
+		return TC_ACT_SHOT;
+	pckt->proto = iph->protocol;
+	pckt->flags |= F_ICMP;
+	pckt->src = iph->daddr;
+	pckt->dst = iph->saddr;
+	return TC_ACT_UNSPEC;
+}
+
+static bool parse_udp(void *data, __u64 off, void *data_end,
+		      struct packet_description *pckt)
+{
+	struct udphdr *udp;
+	udp = data + off;
+
+	if (udp + 1 > data_end)
+		return false;
+
+	if (!(pckt->flags & F_ICMP)) {
+		pckt->port16[0] = udp->source;
+		pckt->port16[1] = udp->dest;
+	} else {
+		pckt->port16[0] = udp->dest;
+		pckt->port16[1] = udp->source;
+	}
+	return true;
+}
+
+static bool parse_tcp(void *data, __u64 off, void *data_end,
+		      struct packet_description *pckt)
+{
+	struct tcphdr *tcp;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return false;
+
+	if (tcp->syn)
+		pckt->flags |= F_SYN_SET;
+
+	if (!(pckt->flags & F_ICMP)) {
+		pckt->port16[0] = tcp->source;
+		pckt->port16[1] = tcp->dest;
+	} else {
+		pckt->port16[0] = tcp->dest;
+		pckt->port16[1] = tcp->source;
+	}
+	return true;
+}
+
+static int process_packet(void *data, __u64 off, void *data_end,
+			  bool is_ipv6, struct __sk_buff *skb)
+{
+	void *pkt_start = (void *)(long)skb->data;
+	struct packet_description pckt = {};
+	struct eth_hdr *eth = pkt_start;
+	struct bpf_tunnel_key tkey = {};
+	struct vip_stats *data_stats;
+	struct real_definition *dst;
+	struct vip_meta *vip_info;
+	struct ctl_value *cval;
+	__u32 v4_intf_pos = 1;
+	__u32 v6_intf_pos = 2;
+	struct ipv6hdr *ip6h;
+	struct vip vip = {};
+	struct iphdr *iph;
+	int tun_flag = 0;
+	__u16 pkt_bytes;
+	__u64 iph_len;
+	__u32 ifindex;
+	__u8 protocol;
+	__u32 vip_num;
+	int action;
+
+	tkey.tunnel_ttl = 64;
+	if (is_ipv6) {
+		ip6h = data + off;
+		if (ip6h + 1 > data_end)
+			return TC_ACT_SHOT;
+
+		iph_len = sizeof(struct ipv6hdr);
+		protocol = ip6h->nexthdr;
+		pckt.proto = protocol;
+		pkt_bytes = bpf_ntohs(ip6h->payload_len);
+		off += iph_len;
+		if (protocol == IPPROTO_FRAGMENT) {
+			return TC_ACT_SHOT;
+		} else if (protocol == IPPROTO_ICMPV6) {
+			action = parse_icmpv6(data, data_end, off, &pckt);
+			if (action >= 0)
+				return action;
+			off += IPV6_PLUS_ICMP_HDR;
+		} else {
+			memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+			memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+		}
+	} else {
+		iph = data + off;
+		if (iph + 1 > data_end)
+			return TC_ACT_SHOT;
+		if (iph->ihl != 5)
+			return TC_ACT_SHOT;
+
+		protocol = iph->protocol;
+		pckt.proto = protocol;
+		pkt_bytes = bpf_ntohs(iph->tot_len);
+		off += IPV4_HDR_LEN_NO_OPT;
+
+		if (iph->frag_off & PCKT_FRAGMENTED)
+			return TC_ACT_SHOT;
+		if (protocol == IPPROTO_ICMP) {
+			action = parse_icmp(data, data_end, off, &pckt);
+			if (action >= 0)
+				return action;
+			off += IPV4_PLUS_ICMP_HDR;
+		} else {
+			pckt.src = iph->saddr;
+			pckt.dst = iph->daddr;
+		}
+	}
+	protocol = pckt.proto;
+
+	if (protocol == IPPROTO_TCP) {
+		if (!parse_tcp(data, off, data_end, &pckt))
+			return TC_ACT_SHOT;
+	} else if (protocol == IPPROTO_UDP) {
+		if (!parse_udp(data, off, data_end, &pckt))
+			return TC_ACT_SHOT;
+	} else {
+		return TC_ACT_SHOT;
+	}
+
+	if (is_ipv6)
+		memcpy(vip.daddr.v6, pckt.dstv6, 16);
+	else
+		vip.daddr.v4 = pckt.dst;
+
+	vip.dport = pckt.port16[1];
+	vip.protocol = pckt.proto;
+	vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+	if (!vip_info) {
+		vip.dport = 0;
+		vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+		if (!vip_info)
+			return TC_ACT_SHOT;
+		pckt.port16[1] = 0;
+	}
+
+	if (vip_info->flags & F_HASH_NO_SRC_PORT)
+		pckt.port16[0] = 0;
+
+	if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+		return TC_ACT_SHOT;
+
+	if (dst->flags & F_IPV6) {
+		cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+		if (!cval)
+			return TC_ACT_SHOT;
+		ifindex = cval->ifindex;
+		memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+		tun_flag = BPF_F_TUNINFO_IPV6;
+	} else {
+		cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+		if (!cval)
+			return TC_ACT_SHOT;
+		ifindex = cval->ifindex;
+		tkey.remote_ipv4 = dst->dst;
+	}
+	vip_num = vip_info->vip_num;
+	data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+	if (!data_stats)
+		return TC_ACT_SHOT;
+	data_stats->pkts++;
+	data_stats->bytes += pkt_bytes;
+	bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+	*(u32 *)eth->eth_dest = tkey.remote_ipv4;
+	return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	struct eth_hdr *eth = data;
+	__u32 eth_proto;
+	__u32 nh_off;
+
+	nh_off = sizeof(struct eth_hdr);
+	if (data + nh_off > data_end)
+		return TC_ACT_SHOT;
+	eth_proto = eth->eth_proto;
+	if (eth_proto == bpf_htons(ETH_P_IP))
+		return process_packet(data, nh_off, data_end, false, ctx);
+	else if (eth_proto == bpf_htons(ETH_P_IPV6))
+		return process_packet(data, nh_off, data_end, true, ctx);
+	else
+		return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
new file mode 100755
index 000000000..2989b2e2d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_libbpf.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+export TESTNAME=test_libbpf
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
+	if [ $? -eq 0 ]; then
+		echo "selftests: $TESTNAME [PASS]";
+	else
+		echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2
+		echo "selftests: $TESTNAME [FAILED]";
+	fi
+}
+
+libbpf_open_file()
+{
+	LAST_LOADED=$1
+	if [ -n "$VERBOSE" ]; then
+	    ./test_libbpf_open $1
+	else
+	    ./test_libbpf_open --quiet $1
+	fi
+}
+
+# Exit script immediately (well catched by trap handler) if any
+# program/thing exits with a non-zero status.
+set -e
+
+# (Use 'trap -l' to list meaning of numbers)
+trap exit_handler 0 2 3 6 9
+
+libbpf_open_file test_l4lb.o
+
+# Load a program with BPF-to-BPF calls
+libbpf_open_file test_l4lb_noinline.o
+
+# Load a program compiled without the "-target bpf" flag
+libbpf_open_file test_xdp.o
+
+# Success
+exit 0
diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c
new file mode 100644
index 000000000..cbd55f5f8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_libbpf_open.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
+ */
+static const char *__doc__ =
+	"Libbpf test program for loading BPF ELF object files";
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <bpf/libbpf.h>
+#include <getopt.h>
+
+#include "bpf_rlimit.h"
+
+static const struct option long_options[] = {
+	{"help",	no_argument,		NULL, 'h' },
+	{"debug",	no_argument,		NULL, 'D' },
+	{"quiet",	no_argument,		NULL, 'q' },
+	{0, 0, NULL,  0 }
+};
+
+static void usage(char *argv[])
+{
+	int i;
+
+	printf("\nDOCUMENTATION:\n%s\n\n", __doc__);
+	printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]);
+	printf(" Listing options:\n");
+	for (i = 0; long_options[i].name != 0; i++) {
+		printf(" --%-12s", long_options[i].name);
+		printf(" short-option: -%c",
+		       long_options[i].val);
+		printf("\n");
+	}
+	printf("\n");
+}
+
+#define DEFINE_PRINT_FN(name, enabled) \
+static int libbpf_##name(const char *fmt, ...)  	\
+{							\
+        va_list args;					\
+        int ret;					\
+							\
+        va_start(args, fmt);				\
+	if (enabled) {					\
+		fprintf(stderr, "[" #name "] ");	\
+		ret = vfprintf(stderr, fmt, args);	\
+	}						\
+        va_end(args);					\
+        return ret;					\
+}
+DEFINE_PRINT_FN(warning, 1)
+DEFINE_PRINT_FN(info, 1)
+DEFINE_PRINT_FN(debug, 1)
+
+#define EXIT_FAIL_LIBBPF EXIT_FAILURE
+#define EXIT_FAIL_OPTION 2
+
+int test_walk_progs(struct bpf_object *obj, bool verbose)
+{
+	struct bpf_program *prog;
+	int cnt = 0;
+
+	bpf_object__for_each_program(prog, obj) {
+		cnt++;
+		if (verbose)
+			printf("Prog (count:%d) section_name: %s\n", cnt,
+			       bpf_program__title(prog, false));
+	}
+	return 0;
+}
+
+int test_walk_maps(struct bpf_object *obj, bool verbose)
+{
+	struct bpf_map *map;
+	int cnt = 0;
+
+	bpf_map__for_each(map, obj) {
+		cnt++;
+		if (verbose)
+			printf("Map (count:%d) name: %s\n", cnt,
+			       bpf_map__name(map));
+	}
+	return 0;
+}
+
+int test_open_file(char *filename, bool verbose)
+{
+	struct bpf_object *bpfobj = NULL;
+	long err;
+
+	if (verbose)
+		printf("Open BPF ELF-file with libbpf: %s\n", filename);
+
+	/* Load BPF ELF object file and check for errors */
+	bpfobj = bpf_object__open(filename);
+	err = libbpf_get_error(bpfobj);
+	if (err) {
+		char err_buf[128];
+		libbpf_strerror(err, err_buf, sizeof(err_buf));
+		if (verbose)
+			printf("Unable to load eBPF objects in file '%s': %s\n",
+			       filename, err_buf);
+		return EXIT_FAIL_LIBBPF;
+	}
+	test_walk_progs(bpfobj, verbose);
+	test_walk_maps(bpfobj, verbose);
+
+	if (verbose)
+		printf("Close BPF ELF-file with libbpf: %s\n",
+		       bpf_object__name(bpfobj));
+	bpf_object__close(bpfobj);
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	char filename[1024] = { 0 };
+	bool verbose = 1;
+	int longindex = 0;
+	int opt;
+
+	libbpf_set_print(libbpf_warning, libbpf_info, NULL);
+
+	/* Parse commands line args */
+	while ((opt = getopt_long(argc, argv, "hDq",
+				  long_options, &longindex)) != -1) {
+		switch (opt) {
+		case 'D':
+			libbpf_set_print(libbpf_warning, libbpf_info,
+					 libbpf_debug);
+			break;
+		case 'q': /* Use in scripting mode */
+			verbose = 0;
+			break;
+		case 'h':
+		default:
+			usage(argv);
+			return EXIT_FAIL_OPTION;
+		}
+	}
+	if (optind >= argc) {
+		usage(argv);
+		printf("ERROR: Expected BPF_FILE argument after options\n");
+		return EXIT_FAIL_OPTION;
+	}
+	snprintf(filename, sizeof(filename), "%s", argv[optind]);
+
+	return test_open_file(filename, verbose);
+}
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
new file mode 100755
index 000000000..795e56e3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=$ksft_skip
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+modprobe rc-loopback
+
+for i in /sys/class/rc/rc*
+do
+	if grep -q DRV_NAME=rc-loopback $i/uevent
+	then
+		LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q)
+	fi
+done
+
+if [ -n "$LIRCDEV" ];
+then
+	TYPE=lirc_mode2
+	./test_lirc_mode2_user $LIRCDEV
+	ret=$?
+	if [ $ret -ne 0 ]; then
+		echo -e ${RED}"FAIL: $TYPE"${NC}
+	else
+		echo -e ${GREEN}"PASS: $TYPE"${NC}
+	fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
new file mode 100644
index 000000000..ba2685556
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// test ir decoder
+//
+// Copyright (C) 2018 Sean Young <sean@mess.org>
+
+#include <linux/bpf.h>
+#include <linux/lirc.h>
+#include "bpf_helpers.h"
+
+SEC("lirc_mode2")
+int bpf_decoder(unsigned int *sample)
+{
+	if (LIRC_IS_PULSE(*sample)) {
+		unsigned int duration = LIRC_VALUE(*sample);
+
+		if (duration & 0x10000)
+			bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0);
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
new file mode 100644
index 000000000..d470d63c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+// test ir decoder
+//
+// Copyright (C) 2018 Sean Young <sean@mess.org>
+
+// A lirc chardev is a device representing a consumer IR (cir) device which
+// can receive infrared signals from remote control and/or transmit IR.
+//
+// IR is sent as a series of pulses and space somewhat like morse code. The
+// BPF program can decode this into scancodes so that rc-core can translate
+// this into input key codes using the rc keymap.
+//
+// This test works by sending IR over rc-loopback, so the IR is processed by
+// BPF and then decoded into scancodes. The lirc chardev must be the one
+// associated with rc-loopback, see the output of ir-keytable(1).
+//
+// The following CONFIG options must be enabled for the test to succeed:
+// CONFIG_RC_CORE=y
+// CONFIG_BPF_RAWIR_EVENT=y
+// CONFIG_RC_LOOPBACK=y
+
+// Steps:
+// 1. Open the /dev/lircN device for rc-loopback (given on command line)
+// 2. Attach bpf_lirc_mode2 program which decodes some IR.
+// 3. Send some IR to the same IR device; since it is loopback, this will
+//    end up in the bpf program
+// 4. bpf program should decode IR and report keycode
+// 5. We can read keycode from same /dev/lirc device
+
+#include <linux/bpf.h>
+#include <linux/lirc.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <poll.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+int main(int argc, char **argv)
+{
+	struct bpf_object *obj;
+	int ret, lircfd, progfd, mode;
+	int testir = 0x1dead;
+	u32 prog_ids[10], prog_flags[10], prog_cnt;
+
+	if (argc != 2) {
+		printf("Usage: %s /dev/lircN\n", argv[0]);
+		return 2;
+	}
+
+	ret = bpf_prog_load("test_lirc_mode2_kern.o",
+			    BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
+	if (ret) {
+		printf("Failed to load bpf program\n");
+		return 1;
+	}
+
+	lircfd = open(argv[1], O_RDWR | O_NONBLOCK);
+	if (lircfd == -1) {
+		printf("failed to open lirc device %s: %m\n", argv[1]);
+		return 1;
+	}
+
+	/* Let's try detach it before it was ever attached */
+	ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
+	if (ret != -1 || errno != ENOENT) {
+		printf("bpf_prog_detach2 not attached should fail: %m\n");
+		return 1;
+	}
+
+	mode = LIRC_MODE_SCANCODE;
+	if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) {
+		printf("failed to set rec mode: %m\n");
+		return 1;
+	}
+
+	prog_cnt = 10;
+	ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
+			     &prog_cnt);
+	if (ret) {
+		printf("Failed to query bpf programs on lirc device: %m\n");
+		return 1;
+	}
+
+	if (prog_cnt != 0) {
+		printf("Expected nothing to be attached\n");
+		return 1;
+	}
+
+	ret = bpf_prog_attach(progfd, lircfd, BPF_LIRC_MODE2, 0);
+	if (ret) {
+		printf("Failed to attach bpf to lirc device: %m\n");
+		return 1;
+	}
+
+	/* Write raw IR */
+	ret = write(lircfd, &testir, sizeof(testir));
+	if (ret != sizeof(testir)) {
+		printf("Failed to send test IR message: %m\n");
+		return 1;
+	}
+
+	struct pollfd pfd = { .fd = lircfd, .events = POLLIN };
+	struct lirc_scancode lsc;
+
+	poll(&pfd, 1, 100);
+
+	/* Read decoded IR */
+	ret = read(lircfd, &lsc, sizeof(lsc));
+	if (ret != sizeof(lsc)) {
+		printf("Failed to read decoded IR: %m\n");
+		return 1;
+	}
+
+	if (lsc.scancode != 0xdead || lsc.rc_proto != 64) {
+		printf("Incorrect scancode decoded\n");
+		return 1;
+	}
+
+	prog_cnt = 10;
+	ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
+			     &prog_cnt);
+	if (ret) {
+		printf("Failed to query bpf programs on lirc device: %m\n");
+		return 1;
+	}
+
+	if (prog_cnt != 1) {
+		printf("Expected one program to be attached\n");
+		return 1;
+	}
+
+	/* Let's try detaching it now it is actually attached */
+	ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
+	if (ret) {
+		printf("bpf_prog_detach2: returned %m\n");
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
new file mode 100644
index 000000000..006be3963
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -0,0 +1,804 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Randomized tests for eBPF longest-prefix-match maps
+ *
+ * This program runs randomized tests against the lpm-bpf-map. It implements a
+ * "Trivial Longest Prefix Match" (tlpm) based on simple, linear, singly linked
+ * lists. The implementation should be pretty straightforward.
+ *
+ * Based on tlpm, this inserts randomized data into bpf-lpm-maps and verifies
+ * the trie-based bpf-map implementation behaves the same way as tlpm.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bpf.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include <bpf/bpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+
+struct tlpm_node {
+	struct tlpm_node *next;
+	size_t n_bits;
+	uint8_t key[];
+};
+
+static struct tlpm_node *tlpm_match(struct tlpm_node *list,
+				    const uint8_t *key,
+				    size_t n_bits);
+
+static struct tlpm_node *tlpm_add(struct tlpm_node *list,
+				  const uint8_t *key,
+				  size_t n_bits)
+{
+	struct tlpm_node *node;
+	size_t n;
+
+	n = (n_bits + 7) / 8;
+
+	/* 'overwrite' an equivalent entry if one already exists */
+	node = tlpm_match(list, key, n_bits);
+	if (node && node->n_bits == n_bits) {
+		memcpy(node->key, key, n);
+		return list;
+	}
+
+	/* add new entry with @key/@n_bits to @list and return new head */
+
+	node = malloc(sizeof(*node) + n);
+	assert(node);
+
+	node->next = list;
+	node->n_bits = n_bits;
+	memcpy(node->key, key, n);
+
+	return node;
+}
+
+static void tlpm_clear(struct tlpm_node *list)
+{
+	struct tlpm_node *node;
+
+	/* free all entries in @list */
+
+	while ((node = list)) {
+		list = list->next;
+		free(node);
+	}
+}
+
+static struct tlpm_node *tlpm_match(struct tlpm_node *list,
+				    const uint8_t *key,
+				    size_t n_bits)
+{
+	struct tlpm_node *best = NULL;
+	size_t i;
+
+	/* Perform longest prefix-match on @key/@n_bits. That is, iterate all
+	 * entries and match each prefix against @key. Remember the "best"
+	 * entry we find (i.e., the longest prefix that matches) and return it
+	 * to the caller when done.
+	 */
+
+	for ( ; list; list = list->next) {
+		for (i = 0; i < n_bits && i < list->n_bits; ++i) {
+			if ((key[i / 8] & (1 << (7 - i % 8))) !=
+			    (list->key[i / 8] & (1 << (7 - i % 8))))
+				break;
+		}
+
+		if (i >= list->n_bits) {
+			if (!best || i > best->n_bits)
+				best = list;
+		}
+	}
+
+	return best;
+}
+
+static struct tlpm_node *tlpm_delete(struct tlpm_node *list,
+				     const uint8_t *key,
+				     size_t n_bits)
+{
+	struct tlpm_node *best = tlpm_match(list, key, n_bits);
+	struct tlpm_node *node;
+
+	if (!best || best->n_bits != n_bits)
+		return list;
+
+	if (best == list) {
+		node = best->next;
+		free(best);
+		return node;
+	}
+
+	for (node = list; node; node = node->next) {
+		if (node->next == best) {
+			node->next = best->next;
+			free(best);
+			return list;
+		}
+	}
+	/* should never get here */
+	assert(0);
+	return list;
+}
+
+static void test_lpm_basic(void)
+{
+	struct tlpm_node *list = NULL, *t1, *t2;
+
+	/* very basic, static tests to verify tlpm works as expected */
+
+	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+
+	t1 = list = tlpm_add(list, (uint8_t[]){ 0xff }, 8);
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0x00 }, 16));
+	assert(!tlpm_match(list, (uint8_t[]){ 0x7f }, 8));
+	assert(!tlpm_match(list, (uint8_t[]){ 0xfe }, 8));
+	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 7));
+
+	t2 = list = tlpm_add(list, (uint8_t[]){ 0xff, 0xff }, 16);
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+	assert(t2 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15));
+	assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16));
+
+	list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16);
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+	assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+
+	list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8);
+	assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+
+	tlpm_clear(list);
+}
+
+static void test_lpm_order(void)
+{
+	struct tlpm_node *t1, *t2, *l1 = NULL, *l2 = NULL;
+	size_t i, j;
+
+	/* Verify the tlpm implementation works correctly regardless of the
+	 * order of entries. Insert a random set of entries into @l1, and copy
+	 * the same data in reverse order into @l2. Then verify a lookup of
+	 * random keys will yield the same result in both sets.
+	 */
+
+	for (i = 0; i < (1 << 12); ++i)
+		l1 = tlpm_add(l1, (uint8_t[]){
+					rand() % 0xff,
+					rand() % 0xff,
+				}, rand() % 16 + 1);
+
+	for (t1 = l1; t1; t1 = t1->next)
+		l2 = tlpm_add(l2, t1->key, t1->n_bits);
+
+	for (i = 0; i < (1 << 8); ++i) {
+		uint8_t key[] = { rand() % 0xff, rand() % 0xff };
+
+		t1 = tlpm_match(l1, key, 16);
+		t2 = tlpm_match(l2, key, 16);
+
+		assert(!t1 == !t2);
+		if (t1) {
+			assert(t1->n_bits == t2->n_bits);
+			for (j = 0; j < t1->n_bits; ++j)
+				assert((t1->key[j / 8] & (1 << (7 - j % 8))) ==
+				       (t2->key[j / 8] & (1 << (7 - j % 8))));
+		}
+	}
+
+	tlpm_clear(l1);
+	tlpm_clear(l2);
+}
+
+static void test_lpm_map(int keysize)
+{
+	size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
+	struct tlpm_node *t, *list = NULL;
+	struct bpf_lpm_trie_key *key;
+	uint8_t *data, *value;
+	int r, map;
+
+	/* Compare behavior of tlpm vs. bpf-lpm. Create a randomized set of
+	 * prefixes and insert it into both tlpm and bpf-lpm. Then run some
+	 * randomized lookups and verify both maps return the same result.
+	 */
+
+	n_matches = 0;
+	n_matches_after_delete = 0;
+	n_nodes = 1 << 8;
+	n_lookups = 1 << 16;
+
+	data = alloca(keysize);
+	memset(data, 0, keysize);
+
+	value = alloca(keysize + 1);
+	memset(value, 0, keysize + 1);
+
+	key = alloca(sizeof(*key) + keysize);
+	memset(key, 0, sizeof(*key) + keysize);
+
+	map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+			     sizeof(*key) + keysize,
+			     keysize + 1,
+			     4096,
+			     BPF_F_NO_PREALLOC);
+	assert(map >= 0);
+
+	for (i = 0; i < n_nodes; ++i) {
+		for (j = 0; j < keysize; ++j)
+			value[j] = rand() & 0xff;
+		value[keysize] = rand() % (8 * keysize + 1);
+
+		list = tlpm_add(list, value, value[keysize]);
+
+		key->prefixlen = value[keysize];
+		memcpy(key->data, value, keysize);
+		r = bpf_map_update_elem(map, key, value, 0);
+		assert(!r);
+	}
+
+	for (i = 0; i < n_lookups; ++i) {
+		for (j = 0; j < keysize; ++j)
+			data[j] = rand() & 0xff;
+
+		t = tlpm_match(list, data, 8 * keysize);
+
+		key->prefixlen = 8 * keysize;
+		memcpy(key->data, data, keysize);
+		r = bpf_map_lookup_elem(map, key, value);
+		assert(!r || errno == ENOENT);
+		assert(!t == !!r);
+
+		if (t) {
+			++n_matches;
+			assert(t->n_bits == value[keysize]);
+			for (j = 0; j < t->n_bits; ++j)
+				assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
+				       (value[j / 8] & (1 << (7 - j % 8))));
+		}
+	}
+
+	/* Remove the first half of the elements in the tlpm and the
+	 * corresponding nodes from the bpf-lpm.  Then run the same
+	 * large number of random lookups in both and make sure they match.
+	 * Note: we need to count the number of nodes actually inserted
+	 * since there may have been duplicates.
+	 */
+	for (i = 0, t = list; t; i++, t = t->next)
+		;
+	for (j = 0; j < i / 2; ++j) {
+		key->prefixlen = list->n_bits;
+		memcpy(key->data, list->key, keysize);
+		r = bpf_map_delete_elem(map, key);
+		assert(!r);
+		list = tlpm_delete(list, list->key, list->n_bits);
+		assert(list);
+	}
+	for (i = 0; i < n_lookups; ++i) {
+		for (j = 0; j < keysize; ++j)
+			data[j] = rand() & 0xff;
+
+		t = tlpm_match(list, data, 8 * keysize);
+
+		key->prefixlen = 8 * keysize;
+		memcpy(key->data, data, keysize);
+		r = bpf_map_lookup_elem(map, key, value);
+		assert(!r || errno == ENOENT);
+		assert(!t == !!r);
+
+		if (t) {
+			++n_matches_after_delete;
+			assert(t->n_bits == value[keysize]);
+			for (j = 0; j < t->n_bits; ++j)
+				assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
+				       (value[j / 8] & (1 << (7 - j % 8))));
+		}
+	}
+
+	close(map);
+	tlpm_clear(list);
+
+	/* With 255 random nodes in the map, we are pretty likely to match
+	 * something on every lookup. For statistics, use this:
+	 *
+	 *     printf("          nodes: %zu\n"
+	 *            "        lookups: %zu\n"
+	 *            "        matches: %zu\n"
+	 *            "matches(delete): %zu\n",
+	 *            n_nodes, n_lookups, n_matches, n_matches_after_delete);
+	 */
+}
+
+/* Test the implementation with some 'real world' examples */
+
+static void test_lpm_ipaddr(void)
+{
+	struct bpf_lpm_trie_key *key_ipv4;
+	struct bpf_lpm_trie_key *key_ipv6;
+	size_t key_size_ipv4;
+	size_t key_size_ipv6;
+	int map_fd_ipv4;
+	int map_fd_ipv6;
+	__u64 value;
+
+	key_size_ipv4 = sizeof(*key_ipv4) + sizeof(__u32);
+	key_size_ipv6 = sizeof(*key_ipv6) + sizeof(__u32) * 4;
+	key_ipv4 = alloca(key_size_ipv4);
+	key_ipv6 = alloca(key_size_ipv6);
+
+	map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+				     key_size_ipv4, sizeof(value),
+				     100, BPF_F_NO_PREALLOC);
+	assert(map_fd_ipv4 >= 0);
+
+	map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+				     key_size_ipv6, sizeof(value),
+				     100, BPF_F_NO_PREALLOC);
+	assert(map_fd_ipv6 >= 0);
+
+	/* Fill data some IPv4 and IPv6 address ranges */
+	value = 1;
+	key_ipv4->prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+	value = 2;
+	key_ipv4->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+	value = 3;
+	key_ipv4->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", key_ipv4->data);
+	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+	value = 5;
+	key_ipv4->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", key_ipv4->data);
+	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+	value = 4;
+	key_ipv4->prefixlen = 23;
+	inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+	assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+	value = 0xdeadbeef;
+	key_ipv6->prefixlen = 64;
+	inet_pton(AF_INET6, "2a00:1450:4001:814::200e", key_ipv6->data);
+	assert(bpf_map_update_elem(map_fd_ipv6, key_ipv6, &value, 0) == 0);
+
+	/* Set tprefixlen to maximum for lookups */
+	key_ipv4->prefixlen = 32;
+	key_ipv6->prefixlen = 128;
+
+	/* Test some lookups that should come back with a value */
+	inet_pton(AF_INET, "192.168.128.23", key_ipv4->data);
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
+	assert(value == 3);
+
+	inet_pton(AF_INET, "192.168.0.1", key_ipv4->data);
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
+	assert(value == 2);
+
+	inet_pton(AF_INET6, "2a00:1450:4001:814::", key_ipv6->data);
+	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
+	assert(value == 0xdeadbeef);
+
+	inet_pton(AF_INET6, "2a00:1450:4001:814::1", key_ipv6->data);
+	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
+	assert(value == 0xdeadbeef);
+
+	/* Test some lookups that should not match any entry */
+	inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
+	       errno == ENOENT);
+
+	inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
+	       errno == ENOENT);
+
+	inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
+	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
+	       errno == ENOENT);
+
+	close(map_fd_ipv4);
+	close(map_fd_ipv6);
+}
+
+static void test_lpm_delete(void)
+{
+	struct bpf_lpm_trie_key *key;
+	size_t key_size;
+	int map_fd;
+	__u64 value;
+
+	key_size = sizeof(*key) + sizeof(__u32);
+	key = alloca(key_size);
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+				key_size, sizeof(value),
+				100, BPF_F_NO_PREALLOC);
+	assert(map_fd >= 0);
+
+	/* Add nodes:
+	 * 192.168.0.0/16   (1)
+	 * 192.168.0.0/24   (2)
+	 * 192.168.128.0/24 (3)
+	 * 192.168.1.0/24   (4)
+	 *
+	 *         (1)
+	 *        /   \
+         *     (IM)    (3)
+	 *    /   \
+         *   (2)  (4)
+	 */
+	value = 1;
+	key->prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", key->data);
+	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+	value = 2;
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", key->data);
+	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+	value = 3;
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", key->data);
+	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+	value = 4;
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", key->data);
+	assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+	/* remove non-existent node */
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "10.0.0.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
+		errno == ENOENT);
+
+	key->prefixlen = 30; // unused prefix so far
+	inet_pton(AF_INET, "192.255.0.0", key->data);
+	assert(bpf_map_delete_elem(map_fd, key) == -1 &&
+		errno == ENOENT);
+
+	key->prefixlen = 16; // same prefix as the root node
+	inet_pton(AF_INET, "192.255.0.0", key->data);
+	assert(bpf_map_delete_elem(map_fd, key) == -1 &&
+		errno == ENOENT);
+
+	/* assert initial lookup */
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "192.168.0.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+	assert(value == 2);
+
+	/* remove leaf node */
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", key->data);
+	assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "192.168.0.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+	assert(value == 1);
+
+	/* remove leaf (and intermediary) node */
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", key->data);
+	assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "192.168.1.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+	assert(value == 1);
+
+	/* remove root node */
+	key->prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", key->data);
+	assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "192.168.128.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+	assert(value == 3);
+
+	/* remove last node */
+	key->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", key->data);
+	assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+	key->prefixlen = 32;
+	inet_pton(AF_INET, "192.168.128.1", key->data);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
+		errno == ENOENT);
+
+	close(map_fd);
+}
+
+static void test_lpm_get_next_key(void)
+{
+	struct bpf_lpm_trie_key *key_p, *next_key_p;
+	size_t key_size;
+	__u32 value = 0;
+	int map_fd;
+
+	key_size = sizeof(*key_p) + sizeof(__u32);
+	key_p = alloca(key_size);
+	next_key_p = alloca(key_size);
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value),
+				100, BPF_F_NO_PREALLOC);
+	assert(map_fd >= 0);
+
+	/* empty tree. get_next_key should return ENOENT */
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* get and verify the first key, get the second one should fail. */
+	key_p->prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168);
+
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* no exact matching key should get the first one in post order. */
+	key_p->prefixlen = 8;
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168);
+
+	/* add one more element (total two) */
+	key_p->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168 && key_p->data[2] == 128);
+
+	memset(next_key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* Add one more element (total three) */
+	key_p->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168 && key_p->data[2] == 0);
+
+	memset(next_key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* Add one more element (total four) */
+	key_p->prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168 && key_p->data[2] == 0);
+
+	memset(next_key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* Add one more element (total five) */
+	key_p->prefixlen = 28;
+	inet_pton(AF_INET, "192.168.1.128", key_p->data);
+	assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+	memset(key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+	assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+	       key_p->data[1] == 168 && key_p->data[2] == 0);
+
+	memset(next_key_p, 0, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 1 &&
+	       next_key_p->data[3] == 128);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168);
+
+	memcpy(key_p, next_key_p, key_size);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+	       errno == ENOENT);
+
+	/* no exact matching key should return the first one in post order */
+	key_p->prefixlen = 22;
+	inet_pton(AF_INET, "192.168.1.0", key_p->data);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+	assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+	       next_key_p->data[1] == 168 && next_key_p->data[2] == 0);
+
+	close(map_fd);
+}
+
+#define MAX_TEST_KEYS	4
+struct lpm_mt_test_info {
+	int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */
+	int iter;
+	int map_fd;
+	struct {
+		__u32 prefixlen;
+		__u32 data;
+	} key[MAX_TEST_KEYS];
+};
+
+static void *lpm_test_command(void *arg)
+{
+	int i, j, ret, iter, key_size;
+	struct lpm_mt_test_info *info = arg;
+	struct bpf_lpm_trie_key *key_p;
+
+	key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
+	key_p = alloca(key_size);
+	for (iter = 0; iter < info->iter; iter++)
+		for (i = 0; i < MAX_TEST_KEYS; i++) {
+			/* first half of iterations in forward order,
+			 * and second half in backward order.
+			 */
+			j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1;
+			key_p->prefixlen = info->key[j].prefixlen;
+			memcpy(key_p->data, &info->key[j].data, sizeof(__u32));
+			if (info->cmd == 0) {
+				__u32 value = j;
+				/* update must succeed */
+				assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0);
+			} else if (info->cmd == 1) {
+				ret = bpf_map_delete_elem(info->map_fd, key_p);
+				assert(ret == 0 || errno == ENOENT);
+			} else if (info->cmd == 2) {
+				__u32 value;
+				ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
+				assert(ret == 0 || errno == ENOENT);
+			} else {
+				struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
+				ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
+				assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
+			}
+		}
+
+	// Pass successful exit info back to the main thread
+	pthread_exit((void *)info);
+}
+
+static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
+{
+	info->iter = 2000;
+	info->map_fd = map_fd;
+	info->key[0].prefixlen = 16;
+	inet_pton(AF_INET, "192.168.0.0", &info->key[0].data);
+	info->key[1].prefixlen = 24;
+	inet_pton(AF_INET, "192.168.0.0", &info->key[1].data);
+	info->key[2].prefixlen = 24;
+	inet_pton(AF_INET, "192.168.128.0", &info->key[2].data);
+	info->key[3].prefixlen = 24;
+	inet_pton(AF_INET, "192.168.1.0", &info->key[3].data);
+}
+
+static void test_lpm_multi_thread(void)
+{
+	struct lpm_mt_test_info info[4];
+	size_t key_size, value_size;
+	pthread_t thread_id[4];
+	int i, map_fd;
+	void *ret;
+
+	/* create a trie */
+	value_size = sizeof(__u32);
+	key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
+	map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
+				100, BPF_F_NO_PREALLOC);
+
+	/* create 4 threads to test update, delete, lookup and get_next_key */
+	setup_lpm_mt_test_info(&info[0], map_fd);
+	for (i = 0; i < 4; i++) {
+		if (i != 0)
+			memcpy(&info[i], &info[0], sizeof(info[i]));
+		info[i].cmd = i;
+		assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0);
+	}
+
+	for (i = 0; i < 4; i++)
+		assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]);
+
+	close(map_fd);
+}
+
+int main(void)
+{
+	int i;
+
+	/* we want predictable, pseudo random tests */
+	srand(0xf00ba1);
+
+	test_lpm_basic();
+	test_lpm_order();
+
+	/* Test with 8, 16, 24, 32, ... 128 bit prefix length */
+	for (i = 1; i <= 16; ++i)
+		test_lpm_map(i);
+
+	test_lpm_ipaddr();
+	test_lpm_delete();
+	test_lpm_get_next_key();
+	test_lpm_multi_thread();
+
+	printf("test_lpm: OK\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
new file mode 100644
index 000000000..781c7de34
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -0,0 +1,646 @@
+/*
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <sys/wait.h>
+
+#include <bpf/bpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+
+#define LOCAL_FREE_TARGET	(128)
+#define PERCPU_FREE_TARGET	(4)
+
+static int nr_cpus;
+
+static int create_map(int map_type, int map_flags, unsigned int size)
+{
+	int map_fd;
+
+	map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
+				sizeof(unsigned long long), size, map_flags);
+
+	if (map_fd == -1)
+		perror("bpf_create_map");
+
+	return map_fd;
+}
+
+static int map_subset(int map0, int map1)
+{
+	unsigned long long next_key = 0;
+	unsigned long long value0[nr_cpus], value1[nr_cpus];
+	int ret;
+
+	while (!bpf_map_get_next_key(map1, &next_key, &next_key)) {
+		assert(!bpf_map_lookup_elem(map1, &next_key, value1));
+		ret = bpf_map_lookup_elem(map0, &next_key, value0);
+		if (ret) {
+			printf("key:%llu not found from map. %s(%d)\n",
+			       next_key, strerror(errno), errno);
+			return 0;
+		}
+		if (value0[0] != value1[0]) {
+			printf("key:%llu value0:%llu != value1:%llu\n",
+			       next_key, value0[0], value1[0]);
+			return 0;
+		}
+	}
+	return 1;
+}
+
+static int map_equal(int lru_map, int expected)
+{
+	return map_subset(lru_map, expected) && map_subset(expected, lru_map);
+}
+
+static int sched_next_online(int pid, int *next_to_try)
+{
+	cpu_set_t cpuset;
+	int next = *next_to_try;
+	int ret = -1;
+
+	while (next < nr_cpus) {
+		CPU_ZERO(&cpuset);
+		CPU_SET(next++, &cpuset);
+		if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) {
+			ret = 0;
+			break;
+		}
+	}
+
+	*next_to_try = next;
+	return ret;
+}
+
+/* Size of the LRU amp is 2
+ * Add key=1 (+1 key)
+ * Add key=2 (+1 key)
+ * Lookup Key=1
+ * Add Key=3
+ *   => Key=2 will be removed by LRU
+ * Iterate map.  Only found key=1 and key=3
+ */
+static void test_lru_sanity0(int map_type, int map_flags)
+{
+	unsigned long long key, value[nr_cpus];
+	int lru_map_fd, expected_map_fd;
+	int next_cpu = 0;
+
+	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+	       map_flags);
+
+	assert(sched_next_online(0, &next_cpu) != -1);
+
+	if (map_flags & BPF_F_NO_COMMON_LRU)
+		lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus);
+	else
+		lru_map_fd = create_map(map_type, map_flags, 2);
+	assert(lru_map_fd != -1);
+
+	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, 2);
+	assert(expected_map_fd != -1);
+
+	value[0] = 1234;
+
+	/* insert key=1 element */
+
+	key = 1;
+	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+	assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+				    BPF_NOEXIST));
+
+	/* BPF_NOEXIST means: add new element if it doesn't exist */
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
+	       /* key=1 already exists */
+	       && errno == EEXIST);
+
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 &&
+	       errno == EINVAL);
+
+	/* insert key=2 element */
+
+	/* check that key=2 is not found */
+	key = 2;
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
+	       errno == ENOENT);
+
+	/* BPF_EXIST means: update existing element */
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
+	       /* key=2 is not there */
+	       errno == ENOENT);
+
+	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+
+	/* insert key=3 element */
+
+	/* check that key=3 is not found */
+	key = 3;
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
+	       errno == ENOENT);
+
+	/* check that key=1 can be found and mark the ref bit to
+	 * stop LRU from removing key=1
+	 */
+	key = 1;
+	assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+	assert(value[0] == 1234);
+
+	key = 3;
+	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+	assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+				    BPF_NOEXIST));
+
+	/* key=2 has been removed from the LRU */
+	key = 2;
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1);
+
+	assert(map_equal(lru_map_fd, expected_map_fd));
+
+	close(expected_map_fd);
+	close(lru_map_fd);
+
+	printf("Pass\n");
+}
+
+/* Size of the LRU map is 1.5*tgt_free
+ * Insert 1 to tgt_free (+tgt_free keys)
+ * Lookup 1 to tgt_free/2
+ * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
+ * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
+ */
+static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
+{
+	unsigned long long key, end_key, value[nr_cpus];
+	int lru_map_fd, expected_map_fd;
+	unsigned int batch_size;
+	unsigned int map_size;
+	int next_cpu = 0;
+
+	if (map_flags & BPF_F_NO_COMMON_LRU)
+		/* This test is only applicable to common LRU list */
+		return;
+
+	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+	       map_flags);
+
+	assert(sched_next_online(0, &next_cpu) != -1);
+
+	batch_size = tgt_free / 2;
+	assert(batch_size * 2 == tgt_free);
+
+	map_size = tgt_free + batch_size;
+	lru_map_fd = create_map(map_type, map_flags, map_size);
+	assert(lru_map_fd != -1);
+
+	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+	assert(expected_map_fd != -1);
+
+	value[0] = 1234;
+
+	/* Insert 1 to tgt_free (+tgt_free keys) */
+	end_key = 1 + tgt_free;
+	for (key = 1; key < end_key; key++)
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+
+	/* Lookup 1 to tgt_free/2 */
+	end_key = 1 + batch_size;
+	for (key = 1; key < end_key; key++) {
+		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	/* Insert 1+tgt_free to 2*tgt_free
+	 * => 1+tgt_free/2 to LOCALFREE_TARGET will be
+	 * removed by LRU
+	 */
+	key = 1 + tgt_free;
+	end_key = key + tgt_free;
+	for (; key < end_key; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	assert(map_equal(lru_map_fd, expected_map_fd));
+
+	close(expected_map_fd);
+	close(lru_map_fd);
+
+	printf("Pass\n");
+}
+
+/* Size of the LRU map 1.5 * tgt_free
+ * Insert 1 to tgt_free (+tgt_free keys)
+ * Update 1 to tgt_free/2
+ *   => The original 1 to tgt_free/2 will be removed due to
+ *      the LRU shrink process
+ * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
+ * Insert 1+tgt_free to tgt_free*3/2
+ * Insert 1+tgt_free*3/2 to tgt_free*5/2
+ *   => Key 1+tgt_free to tgt_free*3/2
+ *      will be removed from LRU because it has never
+ *      been lookup and ref bit is not set
+ */
+static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
+{
+	unsigned long long key, value[nr_cpus];
+	unsigned long long end_key;
+	int lru_map_fd, expected_map_fd;
+	unsigned int batch_size;
+	unsigned int map_size;
+	int next_cpu = 0;
+
+	if (map_flags & BPF_F_NO_COMMON_LRU)
+		/* This test is only applicable to common LRU list */
+		return;
+
+	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+	       map_flags);
+
+	assert(sched_next_online(0, &next_cpu) != -1);
+
+	batch_size = tgt_free / 2;
+	assert(batch_size * 2 == tgt_free);
+
+	map_size = tgt_free + batch_size;
+	lru_map_fd = create_map(map_type, map_flags, map_size);
+	assert(lru_map_fd != -1);
+
+	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+	assert(expected_map_fd != -1);
+
+	value[0] = 1234;
+
+	/* Insert 1 to tgt_free (+tgt_free keys) */
+	end_key = 1 + tgt_free;
+	for (key = 1; key < end_key; key++)
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+
+	/* Any bpf_map_update_elem will require to acquire a new node
+	 * from LRU first.
+	 *
+	 * The local list is running out of free nodes.
+	 * It gets from the global LRU list which tries to
+	 * shrink the inactive list to get tgt_free
+	 * number of free nodes.
+	 *
+	 * Hence, the oldest key 1 to tgt_free/2
+	 * are removed from the LRU list.
+	 */
+	key = 1;
+	if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_delete_elem(lru_map_fd, &key));
+	} else {
+		assert(bpf_map_update_elem(lru_map_fd, &key, value,
+					   BPF_EXIST));
+	}
+
+	/* Re-insert 1 to tgt_free/2 again and do a lookup
+	 * immeidately.
+	 */
+	end_key = 1 + batch_size;
+	value[0] = 4321;
+	for (key = 1; key < end_key; key++) {
+		assert(bpf_map_lookup_elem(lru_map_fd, &key, value));
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+		assert(value[0] == 4321);
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	value[0] = 1234;
+
+	/* Insert 1+tgt_free to tgt_free*3/2 */
+	end_key = 1 + tgt_free + batch_size;
+	for (key = 1 + tgt_free; key < end_key; key++)
+		/* These newly added but not referenced keys will be
+		 * gone during the next LRU shrink.
+		 */
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+
+	/* Insert 1+tgt_free*3/2 to  tgt_free*5/2 */
+	end_key = key + tgt_free;
+	for (; key < end_key; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	assert(map_equal(lru_map_fd, expected_map_fd));
+
+	close(expected_map_fd);
+	close(lru_map_fd);
+
+	printf("Pass\n");
+}
+
+/* Size of the LRU map is 2*tgt_free
+ * It is to test the active/inactive list rotation
+ * Insert 1 to 2*tgt_free (+2*tgt_free keys)
+ * Lookup key 1 to tgt_free*3/2
+ * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys)
+ *  => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU
+ */
+static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
+{
+	unsigned long long key, end_key, value[nr_cpus];
+	int lru_map_fd, expected_map_fd;
+	unsigned int batch_size;
+	unsigned int map_size;
+	int next_cpu = 0;
+
+	if (map_flags & BPF_F_NO_COMMON_LRU)
+		/* This test is only applicable to common LRU list */
+		return;
+
+	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+	       map_flags);
+
+	assert(sched_next_online(0, &next_cpu) != -1);
+
+	batch_size = tgt_free / 2;
+	assert(batch_size * 2 == tgt_free);
+
+	map_size = tgt_free * 2;
+	lru_map_fd = create_map(map_type, map_flags, map_size);
+	assert(lru_map_fd != -1);
+
+	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+	assert(expected_map_fd != -1);
+
+	value[0] = 1234;
+
+	/* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
+	end_key = 1 + (2 * tgt_free);
+	for (key = 1; key < end_key; key++)
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+
+	/* Lookup key 1 to tgt_free*3/2 */
+	end_key = tgt_free + batch_size;
+	for (key = 1; key < end_key; key++) {
+		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	/* Add 1+2*tgt_free to tgt_free*5/2
+	 * (+tgt_free/2 keys)
+	 */
+	key = 2 * tgt_free + 1;
+	end_key = key + batch_size;
+	for (; key < end_key; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	assert(map_equal(lru_map_fd, expected_map_fd));
+
+	close(expected_map_fd);
+	close(lru_map_fd);
+
+	printf("Pass\n");
+}
+
+/* Test deletion */
+static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
+{
+	int lru_map_fd, expected_map_fd;
+	unsigned long long key, value[nr_cpus];
+	unsigned long long end_key;
+	int next_cpu = 0;
+
+	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+	       map_flags);
+
+	assert(sched_next_online(0, &next_cpu) != -1);
+
+	if (map_flags & BPF_F_NO_COMMON_LRU)
+		lru_map_fd = create_map(map_type, map_flags,
+					3 * tgt_free * nr_cpus);
+	else
+		lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
+	assert(lru_map_fd != -1);
+
+	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,
+				     3 * tgt_free);
+	assert(expected_map_fd != -1);
+
+	value[0] = 1234;
+
+	for (key = 1; key <= 2 * tgt_free; key++)
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+
+	key = 1;
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+
+	for (key = 1; key <= tgt_free; key++) {
+		assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	for (; key <= 2 * tgt_free; key++) {
+		assert(!bpf_map_delete_elem(lru_map_fd, &key));
+		assert(bpf_map_delete_elem(lru_map_fd, &key));
+	}
+
+	end_key = key + 2 * tgt_free;
+	for (; key < end_key; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	assert(map_equal(lru_map_fd, expected_map_fd));
+
+	close(expected_map_fd);
+	close(lru_map_fd);
+
+	printf("Pass\n");
+}
+
+static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
+{
+	unsigned long long key, value[nr_cpus];
+
+	/* Ensure the last key inserted by previous CPU can be found */
+	assert(!bpf_map_lookup_elem(map_fd, &last_key, value));
+
+	value[0] = 1234;
+
+	key = last_key + 1;
+	assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
+	assert(!bpf_map_lookup_elem(map_fd, &key, value));
+
+	/* Cannot find the last key because it was removed by LRU */
+	assert(bpf_map_lookup_elem(map_fd, &last_key, value));
+}
+
+/* Test map with only one element */
+static void test_lru_sanity5(int map_type, int map_flags)
+{
+	unsigned long long key, value[nr_cpus];
+	int next_cpu = 0;
+	int map_fd;
+
+	if (map_flags & BPF_F_NO_COMMON_LRU)
+		return;
+
+	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+	       map_flags);
+
+	map_fd = create_map(map_type, map_flags, 1);
+	assert(map_fd != -1);
+
+	value[0] = 1234;
+	key = 0;
+	assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
+
+	while (sched_next_online(0, &next_cpu) != -1) {
+		pid_t pid;
+
+		pid = fork();
+		if (pid == 0) {
+			do_test_lru_sanity5(key, map_fd);
+			exit(0);
+		} else if (pid == -1) {
+			printf("couldn't spawn process to test key:%llu\n",
+			       key);
+			exit(1);
+		} else {
+			int status;
+
+			assert(waitpid(pid, &status, 0) == pid);
+			assert(status == 0);
+			key++;
+		}
+	}
+
+	close(map_fd);
+	/* At least one key should be tested */
+	assert(key > 0);
+
+	printf("Pass\n");
+}
+
+/* Test list rotation for BPF_F_NO_COMMON_LRU map */
+static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
+{
+	int lru_map_fd, expected_map_fd;
+	unsigned long long key, value[nr_cpus];
+	unsigned int map_size = tgt_free * 2;
+	int next_cpu = 0;
+
+	if (!(map_flags & BPF_F_NO_COMMON_LRU))
+		return;
+
+	printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+	       map_flags);
+
+	assert(sched_next_online(0, &next_cpu) != -1);
+
+	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+	assert(expected_map_fd != -1);
+
+	lru_map_fd = create_map(map_type, map_flags, map_size * nr_cpus);
+	assert(lru_map_fd != -1);
+
+	value[0] = 1234;
+
+	for (key = 1; key <= tgt_free; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	for (; key <= tgt_free * 2; key++) {
+		unsigned long long stable_key;
+
+		/* Make ref bit sticky for key: [1, tgt_free] */
+		for (stable_key = 1; stable_key <= tgt_free; stable_key++) {
+			/* Mark the ref bit */
+			assert(!bpf_map_lookup_elem(lru_map_fd, &stable_key,
+						    value));
+		}
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	for (; key <= tgt_free * 3; key++) {
+		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+					    BPF_NOEXIST));
+		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+					    BPF_NOEXIST));
+	}
+
+	assert(map_equal(lru_map_fd, expected_map_fd));
+
+	close(expected_map_fd);
+	close(lru_map_fd);
+
+	printf("Pass\n");
+}
+
+int main(int argc, char **argv)
+{
+	int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
+			     BPF_MAP_TYPE_LRU_PERCPU_HASH};
+	int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
+	int t, f;
+
+	setbuf(stdout, NULL);
+
+	nr_cpus = bpf_num_possible_cpus();
+	assert(nr_cpus != -1);
+	printf("nr_cpus:%d\n\n", nr_cpus);
+
+	for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+		unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
+			PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
+
+		for (t = 0; t < sizeof(map_types) / sizeof(*map_types); t++) {
+			test_lru_sanity0(map_types[t], map_flags[f]);
+			test_lru_sanity1(map_types[t], map_flags[f], tgt_free);
+			test_lru_sanity2(map_types[t], map_flags[f], tgt_free);
+			test_lru_sanity3(map_types[t], map_flags[f], tgt_free);
+			test_lru_sanity4(map_types[t], map_flags[f], tgt_free);
+			test_lru_sanity5(map_types[t], map_flags[f]);
+			test_lru_sanity6(map_types[t], map_flags[f], tgt_free);
+
+			printf("\n");
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/test_lwt_seg6local.c
new file mode 100644
index 000000000..e2f6ed0a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c
@@ -0,0 +1,437 @@
+#include <stddef.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <linux/seg6_local.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define bpf_printk(fmt, ...)				\
+({							\
+	char ____fmt[] = fmt;				\
+	bpf_trace_printk(____fmt, sizeof(____fmt),	\
+			##__VA_ARGS__);			\
+})
+
+/* Packet parsing state machine helpers. */
+#define cursor_advance(_cursor, _len) \
+	({ void *_tmp = _cursor; _cursor += _len; _tmp; })
+
+#define SR6_FLAG_ALERT (1 << 4)
+
+#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
+				0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
+#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
+				0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
+#define BPF_PACKET_HEADER __attribute__((packed))
+
+struct ip6_t {
+	unsigned int ver:4;
+	unsigned int priority:8;
+	unsigned int flow_label:20;
+	unsigned short payload_len;
+	unsigned char next_header;
+	unsigned char hop_limit;
+	unsigned long long src_hi;
+	unsigned long long src_lo;
+	unsigned long long dst_hi;
+	unsigned long long dst_lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_addr_t {
+	unsigned long long hi;
+	unsigned long long lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_srh_t {
+	unsigned char nexthdr;
+	unsigned char hdrlen;
+	unsigned char type;
+	unsigned char segments_left;
+	unsigned char first_segment;
+	unsigned char flags;
+	unsigned short tag;
+
+	struct ip6_addr_t segments[0];
+} BPF_PACKET_HEADER;
+
+struct sr6_tlv_t {
+	unsigned char type;
+	unsigned char len;
+	unsigned char value[0];
+} BPF_PACKET_HEADER;
+
+static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+{
+	void *cursor, *data_end;
+	struct ip6_srh_t *srh;
+	struct ip6_t *ip;
+	uint8_t *ipver;
+
+	data_end = (void *)(long)skb->data_end;
+	cursor = (void *)(long)skb->data;
+	ipver = (uint8_t *)cursor;
+
+	if ((void *)ipver + sizeof(*ipver) > data_end)
+		return NULL;
+
+	if ((*ipver >> 4) != 6)
+		return NULL;
+
+	ip = cursor_advance(cursor, sizeof(*ip));
+	if ((void *)ip + sizeof(*ip) > data_end)
+		return NULL;
+
+	if (ip->next_header != 43)
+		return NULL;
+
+	srh = cursor_advance(cursor, sizeof(*srh));
+	if ((void *)srh + sizeof(*srh) > data_end)
+		return NULL;
+
+	if (srh->type != 4)
+		return NULL;
+
+	return srh;
+}
+
+static __always_inline
+int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
+		   uint32_t old_pad, uint32_t pad_off)
+{
+	int err;
+
+	if (new_pad != old_pad) {
+		err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
+					  (int) new_pad - (int) old_pad);
+		if (err)
+			return err;
+	}
+
+	if (new_pad > 0) {
+		char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+					0, 0, 0};
+		struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
+
+		pad_tlv->type = SR6_TLV_PADDING;
+		pad_tlv->len = new_pad - 2;
+
+		err = bpf_lwt_seg6_store_bytes(skb, pad_off,
+					       (void *)pad_tlv_buf, new_pad);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static __always_inline
+int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
+			  uint32_t *tlv_off, uint32_t *pad_size,
+			  uint32_t *pad_off)
+{
+	uint32_t srh_off, cur_off;
+	int offset_valid = 0;
+	int err;
+
+	srh_off = (char *)srh - (char *)(long)skb->data;
+	// cur_off = end of segments, start of possible TLVs
+	cur_off = srh_off + sizeof(*srh) +
+		sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
+
+	*pad_off = 0;
+
+	// we can only go as far as ~10 TLVs due to the BPF max stack size
+	#pragma clang loop unroll(full)
+	for (int i = 0; i < 10; i++) {
+		struct sr6_tlv_t tlv;
+
+		if (cur_off == *tlv_off)
+			offset_valid = 1;
+
+		if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
+			break;
+
+		err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
+		if (err)
+			return err;
+
+		if (tlv.type == SR6_TLV_PADDING) {
+			*pad_size = tlv.len + sizeof(tlv);
+			*pad_off = cur_off;
+
+			if (*tlv_off == srh_off) {
+				*tlv_off = cur_off;
+				offset_valid = 1;
+			}
+			break;
+
+		} else if (tlv.type == SR6_TLV_HMAC) {
+			break;
+		}
+
+		cur_off += sizeof(tlv) + tlv.len;
+	} // we reached the padding or HMAC TLVs, or the end of the SRH
+
+	if (*pad_off == 0)
+		*pad_off = cur_off;
+
+	if (*tlv_off == -1)
+		*tlv_off = cur_off;
+	else if (!offset_valid)
+		return -EINVAL;
+
+	return 0;
+}
+
+static __always_inline
+int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
+	    struct sr6_tlv_t *itlv, uint8_t tlv_size)
+{
+	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+	uint8_t len_remaining, new_pad;
+	uint32_t pad_off = 0;
+	uint32_t pad_size = 0;
+	uint32_t partial_srh_len;
+	int err;
+
+	if (tlv_off != -1)
+		tlv_off += srh_off;
+
+	if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
+		return -EINVAL;
+
+	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+	if (err)
+		return err;
+
+	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
+	if (err)
+		return err;
+
+	err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
+	if (err)
+		return err;
+
+	// the following can't be moved inside update_tlv_pad because the
+	// bpf verifier has some issues with it
+	pad_off += sizeof(*itlv) + itlv->len;
+	partial_srh_len = pad_off - srh_off;
+	len_remaining = partial_srh_len % 8;
+	new_pad = 8 - len_remaining;
+
+	if (new_pad == 1) // cannot pad for 1 byte only
+		new_pad = 9;
+	else if (new_pad == 8)
+		new_pad = 0;
+
+	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+static __always_inline
+int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
+	       uint32_t tlv_off)
+{
+	uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+	uint8_t len_remaining, new_pad;
+	uint32_t partial_srh_len;
+	uint32_t pad_off = 0;
+	uint32_t pad_size = 0;
+	struct sr6_tlv_t tlv;
+	int err;
+
+	tlv_off += srh_off;
+
+	err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+	if (err)
+		return err;
+
+	err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
+	if (err)
+		return err;
+
+	err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
+	if (err)
+		return err;
+
+	pad_off -= sizeof(tlv) + tlv.len;
+	partial_srh_len = pad_off - srh_off;
+	len_remaining = partial_srh_len % 8;
+	new_pad = 8 - len_remaining;
+	if (new_pad == 1) // cannot pad for 1 byte only
+		new_pad = 9;
+	else if (new_pad == 8)
+		new_pad = 0;
+
+	return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+static __always_inline
+int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
+{
+	int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
+		((srh->first_segment + 1) << 4);
+	struct sr6_tlv_t tlv;
+
+	if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
+		return 0;
+
+	if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
+		struct ip6_addr_t egr_addr;
+
+		if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
+			return 0;
+
+		// check if egress TLV value is correct
+		if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
+				ntohll(egr_addr.lo) == 0x4)
+			return 1;
+	}
+
+	return 0;
+}
+
+// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
+// fd00::4
+SEC("encap_srh")
+int __encap_srh(struct __sk_buff *skb)
+{
+	unsigned long long hi = 0xfd00000000000000;
+	struct ip6_addr_t *seg;
+	struct ip6_srh_t *srh;
+	char srh_buf[72]; // room for 4 segments
+	int err;
+
+	srh = (struct ip6_srh_t *)srh_buf;
+	srh->nexthdr = 0;
+	srh->hdrlen = 8;
+	srh->type = 4;
+	srh->segments_left = 3;
+	srh->first_segment = 3;
+	srh->flags = 0;
+	srh->tag = 0;
+
+	seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
+
+	#pragma clang loop unroll(full)
+	for (unsigned long long lo = 0; lo < 4; lo++) {
+		seg->lo = htonll(4 - lo);
+		seg->hi = htonll(hi);
+		seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
+	}
+
+	err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
+	if (err)
+		return BPF_DROP;
+
+	return BPF_REDIRECT;
+}
+
+// Add an Egress TLV fc00::4, add the flag A,
+// and apply End.X action to fc42::1
+SEC("add_egr_x")
+int __add_egr_x(struct __sk_buff *skb)
+{
+	unsigned long long hi = 0xfc42000000000000;
+	unsigned long long lo = 0x1;
+	struct ip6_srh_t *srh = get_srh(skb);
+	uint8_t new_flags = SR6_FLAG_ALERT;
+	struct ip6_addr_t addr;
+	int err, offset;
+
+	if (srh == NULL)
+		return BPF_DROP;
+
+	uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+			   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
+
+	err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
+		      (struct sr6_tlv_t *)&tlv, 20);
+	if (err)
+		return BPF_DROP;
+
+	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+	err = bpf_lwt_seg6_store_bytes(skb, offset,
+				       (void *)&new_flags, sizeof(new_flags));
+	if (err)
+		return BPF_DROP;
+
+	addr.lo = htonll(lo);
+	addr.hi = htonll(hi);
+	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
+				  (void *)&addr, sizeof(addr));
+	if (err)
+		return BPF_DROP;
+	return BPF_REDIRECT;
+}
+
+// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
+// simple End action
+SEC("pop_egr")
+int __pop_egr(struct __sk_buff *skb)
+{
+	struct ip6_srh_t *srh = get_srh(skb);
+	uint16_t new_tag = bpf_htons(2442);
+	uint8_t new_flags = 0;
+	int err, offset;
+
+	if (srh == NULL)
+		return BPF_DROP;
+
+	if (srh->flags != SR6_FLAG_ALERT)
+		return BPF_DROP;
+
+	if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
+		return BPF_DROP;
+
+	if (!has_egr_tlv(skb, srh))
+		return BPF_DROP;
+
+	err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
+	if (err)
+		return BPF_DROP;
+
+	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+	if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
+				     sizeof(new_flags)))
+		return BPF_DROP;
+
+	offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
+	if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
+				     sizeof(new_tag)))
+		return BPF_DROP;
+
+	return BPF_OK;
+}
+
+// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
+// then apply a End.T action to reach the last segment
+SEC("inspect_t")
+int __inspect_t(struct __sk_buff *skb)
+{
+	struct ip6_srh_t *srh = get_srh(skb);
+	int table = 117;
+	int err;
+
+	if (srh == NULL)
+		return BPF_DROP;
+
+	if (srh->flags != 0)
+		return BPF_DROP;
+
+	if (srh->tag != bpf_htons(2442))
+		return BPF_DROP;
+
+	if (srh->hdrlen != 8) // 4 segments
+		return BPF_DROP;
+
+	err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
+				  (void *)&table, sizeof(table));
+
+	if (err)
+		return BPF_DROP;
+
+	return BPF_REDIRECT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
new file mode 100755
index 000000000..785eabf2a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+# Connects 6 network namespaces through veths.
+# Each NS may have different IPv6 global scope addresses :
+#   NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
+# fb00::1           fd00::1  fd00::2  fd00::3  fb00::6
+#                   fc42::1           fd00::4
+#
+# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+# IPv6 header with a Segment Routing Header, with segments :
+# 	fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+#
+# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+# - fd00::2 : remove the TLV, change the flags, add a tag
+# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+#
+# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+# Each End.BPF action will validate the operations applied on the SRH by the
+# previous BPF program in the chain, otherwise the packet is dropped.
+#
+# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+# datagram can be read on NS6 when binding to fb00::6.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+	echo $msg please run this as root >&2
+	exit $ksft_skip
+fi
+
+TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
+
+cleanup()
+{
+	if [ "$?" = "0" ]; then
+		echo "selftests: test_lwt_seg6local [PASS]";
+	else
+		echo "selftests: test_lwt_seg6local [FAILED]";
+	fi
+
+	set +e
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+	ip netns del ns3 2> /dev/null
+	ip netns del ns4 2> /dev/null
+	ip netns del ns5 2> /dev/null
+	ip netns del ns6 2> /dev/null
+	rm -f $TMP_FILE
+}
+
+set -e
+
+ip netns add ns1
+ip netns add ns2
+ip netns add ns3
+ip netns add ns4
+ip netns add ns5
+ip netns add ns6
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 type veth peer name veth2
+ip link add veth3 type veth peer name veth4
+ip link add veth5 type veth peer name veth6
+ip link add veth7 type veth peer name veth8
+ip link add veth9 type veth peer name veth10
+
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+ip link set veth3 netns ns2
+ip link set veth4 netns ns3
+ip link set veth5 netns ns3
+ip link set veth6 netns ns4
+ip link set veth7 netns ns4
+ip link set veth8 netns ns5
+ip link set veth9 netns ns5
+ip link set veth10 netns ns6
+
+ip netns exec ns1 ip link set dev veth1 up
+ip netns exec ns2 ip link set dev veth2 up
+ip netns exec ns2 ip link set dev veth3 up
+ip netns exec ns3 ip link set dev veth4 up
+ip netns exec ns3 ip link set dev veth5 up
+ip netns exec ns4 ip link set dev veth6 up
+ip netns exec ns4 ip link set dev veth7 up
+ip netns exec ns5 ip link set dev veth8 up
+ip netns exec ns5 ip link set dev veth9 up
+ip netns exec ns6 ip link set dev veth10 up
+ip netns exec ns6 ip link set dev lo up
+
+# All link scope addresses and routes required between veths
+ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
+ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
+ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
+ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
+ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
+ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
+ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
+ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
+ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
+ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
+ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
+ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
+ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
+ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
+
+ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
+ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
+
+ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
+ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
+
+ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
+ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
+
+ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
+ip netns exec ns4 ip -6 addr add fc42::1 dev lo
+ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
+
+ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
+ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
+
+ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
+ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
+
+ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
+
+ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
+ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
+sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
+kill -INT $!
+
+if [[ $(< $TMP_FILE) != "foobar" ]]; then
+	exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
new file mode 100644
index 000000000..87ba89df9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -0,0 +1,1451 @@
+/*
+ * Testsuite for eBPF maps
+ *
+ * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
+static int map_flags;
+
+#define CHECK(condition, tag, format...) ({				\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag);	\
+		printf(format);						\
+		exit(-1);						\
+	}								\
+})
+
+static void test_hashmap(int task, void *data)
+{
+	long long key, next_key, first_key, value;
+	int fd;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+			    2, map_flags);
+	if (fd < 0) {
+		printf("Failed to create hashmap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	key = 1;
+	value = 1234;
+	/* Insert key=1 element. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+	value = 0;
+	/* BPF_NOEXIST means add new element if it doesn't exist. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	       /* key=1 already exists. */
+	       errno == EEXIST);
+
+	/* -1 is an invalid flag. */
+	assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+	       errno == EINVAL);
+
+	/* Check that key=1 can be found. */
+	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
+
+	key = 2;
+	/* Check that key=2 is not found. */
+	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+
+	/* BPF_EXIST means update existing element. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+	       /* key=2 is not there. */
+	       errno == ENOENT);
+
+	/* Insert key=2 element. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
+
+	/* key=1 and key=2 were inserted, check that key=0 cannot be
+	 * inserted due to max_entries limit.
+	 */
+	key = 0;
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	       errno == E2BIG);
+
+	/* Update existing element, though the map is full. */
+	key = 1;
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
+	key = 2;
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+	key = 3;
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	       errno == E2BIG);
+
+	/* Check that key = 0 doesn't exist. */
+	key = 0;
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
+	       (first_key == 1 || first_key == 2));
+	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
+	       (next_key == first_key));
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
+	       (next_key == 1 || next_key == 2) &&
+	       (next_key != first_key));
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+	       errno == ENOENT);
+
+	/* Delete both elements. */
+	key = 1;
+	assert(bpf_map_delete_elem(fd, &key) == 0);
+	key = 2;
+	assert(bpf_map_delete_elem(fd, &key) == 0);
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+	key = 0;
+	/* Check that map is empty. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+	       errno == ENOENT);
+	assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+	       errno == ENOENT);
+
+	close(fd);
+}
+
+static void test_hashmap_sizes(int task, void *data)
+{
+	int fd, i, j;
+
+	for (i = 1; i <= 512; i <<= 1)
+		for (j = 1; j <= 1 << 18; j <<= 1) {
+			fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
+					    2, map_flags);
+			if (fd < 0) {
+				if (errno == ENOMEM)
+					return;
+				printf("Failed to create hashmap key=%d value=%d '%s'\n",
+				       i, j, strerror(errno));
+				exit(1);
+			}
+			close(fd);
+			usleep(10); /* give kernel time to destroy */
+		}
+}
+
+static void test_hashmap_percpu(int task, void *data)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	BPF_DECLARE_PERCPU(long, value);
+	long long key, next_key, first_key;
+	int expected_key_mask = 0;
+	int fd, i;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
+			    sizeof(bpf_percpu(value, 0)), 2, map_flags);
+	if (fd < 0) {
+		printf("Failed to create hashmap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	for (i = 0; i < nr_cpus; i++)
+		bpf_percpu(value, i) = i + 100;
+
+	key = 1;
+	/* Insert key=1 element. */
+	assert(!(expected_key_mask & key));
+	assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+	expected_key_mask |= key;
+
+	/* BPF_NOEXIST means add new element if it doesn't exist. */
+	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+	       /* key=1 already exists. */
+	       errno == EEXIST);
+
+	/* -1 is an invalid flag. */
+	assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+	       errno == EINVAL);
+
+	/* Check that key=1 can be found. Value could be 0 if the lookup
+	 * was run from a different CPU.
+	 */
+	bpf_percpu(value, 0) = 1;
+	assert(bpf_map_lookup_elem(fd, &key, value) == 0 &&
+	       bpf_percpu(value, 0) == 100);
+
+	key = 2;
+	/* Check that key=2 is not found. */
+	assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+
+	/* BPF_EXIST means update existing element. */
+	assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+	       /* key=2 is not there. */
+	       errno == ENOENT);
+
+	/* Insert key=2 element. */
+	assert(!(expected_key_mask & key));
+	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
+	expected_key_mask |= key;
+
+	/* key=1 and key=2 were inserted, check that key=0 cannot be
+	 * inserted due to max_entries limit.
+	 */
+	key = 0;
+	assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+	       errno == E2BIG);
+
+	/* Check that key = 0 doesn't exist. */
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
+	       ((expected_key_mask & first_key) == first_key));
+	while (!bpf_map_get_next_key(fd, &key, &next_key)) {
+		if (first_key) {
+			assert(next_key == first_key);
+			first_key = 0;
+		}
+		assert((expected_key_mask & next_key) == next_key);
+		expected_key_mask &= ~next_key;
+
+		assert(bpf_map_lookup_elem(fd, &next_key, value) == 0);
+
+		for (i = 0; i < nr_cpus; i++)
+			assert(bpf_percpu(value, i) == i + 100);
+
+		key = next_key;
+	}
+	assert(errno == ENOENT);
+
+	/* Update with BPF_EXIST. */
+	key = 1;
+	assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == 0);
+
+	/* Delete both elements. */
+	key = 1;
+	assert(bpf_map_delete_elem(fd, &key) == 0);
+	key = 2;
+	assert(bpf_map_delete_elem(fd, &key) == 0);
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+	key = 0;
+	/* Check that map is empty. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+	       errno == ENOENT);
+	assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+	       errno == ENOENT);
+
+	close(fd);
+}
+
+static void test_hashmap_walk(int task, void *data)
+{
+	int fd, i, max_entries = 1000;
+	long long key, value, next_key;
+	bool next_key_valid = true;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+			    max_entries, map_flags);
+	if (fd < 0) {
+		printf("Failed to create hashmap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	for (i = 0; i < max_entries; i++) {
+		key = i; value = key;
+		assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
+	}
+
+	for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
+					 &next_key) == 0; i++) {
+		key = next_key;
+		assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+	}
+
+	assert(i == max_entries);
+
+	assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+	for (i = 0; next_key_valid; i++) {
+		next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0;
+		assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+		value++;
+		assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
+		key = next_key;
+	}
+
+	assert(i == max_entries);
+
+	for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
+					 &next_key) == 0; i++) {
+		key = next_key;
+		assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+		assert(value - 1 == key);
+	}
+
+	assert(i == max_entries);
+	close(fd);
+}
+
+static void test_arraymap(int task, void *data)
+{
+	int key, next_key, fd;
+	long long value;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
+			    2, 0);
+	if (fd < 0) {
+		printf("Failed to create arraymap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	key = 1;
+	value = 1234;
+	/* Insert key=1 element. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+	value = 0;
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	       errno == EEXIST);
+
+	/* Check that key=1 can be found. */
+	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
+
+	key = 0;
+	/* Check that key=0 is also found and zero initialized. */
+	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
+
+	/* key=0 and key=1 were inserted, check that key=2 cannot be inserted
+	 * due to max_entries limit.
+	 */
+	key = 2;
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+	       errno == E2BIG);
+
+	/* Check that key = 2 doesn't exist. */
+	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+
+	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
+	       next_key == 0);
+	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
+	       next_key == 0);
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
+	       next_key == 1);
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+	       errno == ENOENT);
+
+	/* Delete shouldn't succeed. */
+	key = 1;
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+
+	close(fd);
+}
+
+static void test_arraymap_percpu(int task, void *data)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	BPF_DECLARE_PERCPU(long, values);
+	int key, next_key, fd, i;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
+			    sizeof(bpf_percpu(values, 0)), 2, 0);
+	if (fd < 0) {
+		printf("Failed to create arraymap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	for (i = 0; i < nr_cpus; i++)
+		bpf_percpu(values, i) = i + 100;
+
+	key = 1;
+	/* Insert key=1 element. */
+	assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
+
+	bpf_percpu(values, 0) = 0;
+	assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+	       errno == EEXIST);
+
+	/* Check that key=1 can be found. */
+	assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
+	       bpf_percpu(values, 0) == 100);
+
+	key = 0;
+	/* Check that key=0 is also found and zero initialized. */
+	assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
+	       bpf_percpu(values, 0) == 0 &&
+	       bpf_percpu(values, nr_cpus - 1) == 0);
+
+	/* Check that key=2 cannot be inserted due to max_entries limit. */
+	key = 2;
+	assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+	       errno == E2BIG);
+
+	/* Check that key = 2 doesn't exist. */
+	assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+
+	/* Iterate over two elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
+	       next_key == 0);
+	assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
+	       next_key == 0);
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
+	       next_key == 1);
+	assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+	       errno == ENOENT);
+
+	/* Delete shouldn't succeed. */
+	key = 1;
+	assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+
+	close(fd);
+}
+
+static void test_arraymap_percpu_many_keys(void)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	BPF_DECLARE_PERCPU(long, values);
+	/* nr_keys is not too large otherwise the test stresses percpu
+	 * allocator more than anything else
+	 */
+	unsigned int nr_keys = 2000;
+	int key, fd, i;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
+			    sizeof(bpf_percpu(values, 0)), nr_keys, 0);
+	if (fd < 0) {
+		printf("Failed to create per-cpu arraymap '%s'!\n",
+		       strerror(errno));
+		exit(1);
+	}
+
+	for (i = 0; i < nr_cpus; i++)
+		bpf_percpu(values, i) = i + 10;
+
+	for (key = 0; key < nr_keys; key++)
+		assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
+
+	for (key = 0; key < nr_keys; key++) {
+		for (i = 0; i < nr_cpus; i++)
+			bpf_percpu(values, i) = 0;
+
+		assert(bpf_map_lookup_elem(fd, &key, values) == 0);
+
+		for (i = 0; i < nr_cpus; i++)
+			assert(bpf_percpu(values, i) == i + 10);
+	}
+
+	close(fd);
+}
+
+static void test_devmap(int task, void *data)
+{
+	int fd;
+	__u32 key, value;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
+			    2, 0);
+	if (fd < 0) {
+		printf("Failed to create arraymap '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	close(fd);
+}
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <sys/select.h>
+#include <linux/err.h>
+#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
+#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
+#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
+static void test_sockmap(int tasks, void *data)
+{
+	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
+	int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
+	int ports[] = {50200, 50201, 50202, 50204};
+	int err, i, fd, udp, sfd[6] = {0xdeadbeef};
+	u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
+	int parse_prog, verdict_prog, msg_prog;
+	struct sockaddr_in addr;
+	int one = 1, s, sc, rc;
+	struct bpf_object *obj;
+	struct timeval to;
+	__u32 key, value;
+	pid_t pid[tasks];
+	fd_set w;
+
+	/* Create some sockets to use with sockmap */
+	for (i = 0; i < 2; i++) {
+		sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
+		if (sfd[i] < 0)
+			goto out;
+		err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
+		if (err) {
+			printf("failed to setsockopt\n");
+			goto out;
+		}
+		err = ioctl(sfd[i], FIONBIO, (char *)&one);
+		if (err < 0) {
+			printf("failed to ioctl\n");
+			goto out;
+		}
+		memset(&addr, 0, sizeof(struct sockaddr_in));
+		addr.sin_family = AF_INET;
+		addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+		addr.sin_port = htons(ports[i]);
+		err = bind(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
+		if (err < 0) {
+			printf("failed to bind: err %i: %i:%i\n",
+			       err, i, sfd[i]);
+			goto out;
+		}
+		err = listen(sfd[i], 32);
+		if (err < 0) {
+			printf("failed to listen\n");
+			goto out;
+		}
+	}
+
+	for (i = 2; i < 4; i++) {
+		sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
+		if (sfd[i] < 0)
+			goto out;
+		err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
+		if (err) {
+			printf("set sock opt\n");
+			goto out;
+		}
+		memset(&addr, 0, sizeof(struct sockaddr_in));
+		addr.sin_family = AF_INET;
+		addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+		addr.sin_port = htons(ports[i - 2]);
+		err = connect(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
+		if (err) {
+			printf("failed to connect\n");
+			goto out;
+		}
+	}
+
+
+	for (i = 4; i < 6; i++) {
+		sfd[i] = accept(sfd[i - 4], NULL, NULL);
+		if (sfd[i] < 0) {
+			printf("accept failed\n");
+			goto out;
+		}
+	}
+
+	/* Test sockmap with connected sockets */
+	fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP,
+			    sizeof(key), sizeof(value),
+			    6, 0);
+	if (fd < 0) {
+		printf("Failed to create sockmap %i\n", fd);
+		goto out_sockmap;
+	}
+
+	/* Test update with unsupported UDP socket */
+	udp = socket(AF_INET, SOCK_DGRAM, 0);
+	i = 0;
+	err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
+	if (!err) {
+		printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+		       i, udp);
+		goto out_sockmap;
+	}
+
+	/* Test update without programs */
+	for (i = 0; i < 6; i++) {
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+		if (i < 2 && !err) {
+			printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
+			       i, sfd[i]);
+			goto out_sockmap;
+		} else if (i >= 2 && err) {
+			printf("Failed noprog update sockmap '%i:%i'\n",
+			       i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test attaching/detaching bad fds */
+	err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0);
+	if (!err) {
+		printf("Failed invalid parser prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (!err) {
+		printf("Failed invalid verdict prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(-1, fd, BPF_SK_MSG_VERDICT, 0);
+	if (!err) {
+		printf("Failed invalid msg verdict prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0);
+	if (!err) {
+		printf("Failed unknown prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_PARSER);
+	if (err) {
+		printf("Failed empty parser prog detach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_VERDICT);
+	if (err) {
+		printf("Failed empty verdict prog detach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT);
+	if (err) {
+		printf("Failed empty msg verdict prog detach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE);
+	if (!err) {
+		printf("Detach invalid prog successful\n");
+		goto out_sockmap;
+	}
+
+	/* Load SK_SKB program and Attach */
+	err = bpf_prog_load(SOCKMAP_PARSE_PROG,
+			    BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
+	if (err) {
+		printf("Failed to load SK_SKB parse prog\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG,
+			    BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
+	if (err) {
+		printf("Failed to load SK_SKB msg prog\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
+			    BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
+	if (err) {
+		printf("Failed to load SK_SKB verdict prog\n");
+		goto out_sockmap;
+	}
+
+	bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
+	if (IS_ERR(bpf_map_rx)) {
+		printf("Failed to load map rx from verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_rx = bpf_map__fd(bpf_map_rx);
+	if (map_fd_rx < 0) {
+		printf("Failed to get map rx fd\n");
+		goto out_sockmap;
+	}
+
+	bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
+	if (IS_ERR(bpf_map_tx)) {
+		printf("Failed to load map tx from verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_tx = bpf_map__fd(bpf_map_tx);
+	if (map_fd_tx < 0) {
+		printf("Failed to get map tx fd\n");
+		goto out_sockmap;
+	}
+
+	bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
+	if (IS_ERR(bpf_map_msg)) {
+		printf("Failed to load map msg from msg_verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_msg = bpf_map__fd(bpf_map_msg);
+	if (map_fd_msg < 0) {
+		printf("Failed to get map msg fd\n");
+		goto out_sockmap;
+	}
+
+	bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
+	if (IS_ERR(bpf_map_break)) {
+		printf("Failed to load map tx from verdict prog\n");
+		goto out_sockmap;
+	}
+
+	map_fd_break = bpf_map__fd(bpf_map_break);
+	if (map_fd_break < 0) {
+		printf("Failed to get map tx fd\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(parse_prog, map_fd_break,
+			      BPF_SK_SKB_STREAM_PARSER, 0);
+	if (!err) {
+		printf("Allowed attaching SK_SKB program to invalid map\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(parse_prog, map_fd_rx,
+		      BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err) {
+		printf("Failed stream parser bpf prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(verdict_prog, map_fd_rx,
+			      BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err) {
+		printf("Failed stream verdict bpf prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0);
+	if (err) {
+		printf("Failed msg verdict bpf prog attach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_attach(verdict_prog, map_fd_rx,
+			      __MAX_BPF_ATTACH_TYPE, 0);
+	if (!err) {
+		printf("Attached unknown bpf prog\n");
+		goto out_sockmap;
+	}
+
+	/* Test map update elem afterwards fd lives in fd and map_fd */
+	for (i = 2; i < 6; i++) {
+		err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
+		if (err) {
+			printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_update_elem(map_fd_tx, &i, &sfd[i], BPF_ANY);
+		if (err) {
+			printf("Failed map_fd_tx update sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test map delete elem and remove send/recv sockets */
+	for (i = 2; i < 4; i++) {
+		err = bpf_map_delete_elem(map_fd_rx, &i);
+		if (err) {
+			printf("Failed delete sockmap rx %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_delete_elem(map_fd_tx, &i);
+		if (err) {
+			printf("Failed delete sockmap tx %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Put sfd[2] (sending fd below) into msg map to test sendmsg bpf */
+	i = 0;
+	err = bpf_map_update_elem(map_fd_msg, &i, &sfd[2], BPF_ANY);
+	if (err) {
+		printf("Failed map_fd_msg update sockmap %i\n", err);
+		goto out_sockmap;
+	}
+
+	/* Test map send/recv */
+	for (i = 0; i < 2; i++) {
+		buf[0] = i;
+		buf[1] = 0x5;
+		sc = send(sfd[2], buf, 20, 0);
+		if (sc < 0) {
+			printf("Failed sockmap send\n");
+			goto out_sockmap;
+		}
+
+		FD_ZERO(&w);
+		FD_SET(sfd[3], &w);
+		to.tv_sec = 30;
+		to.tv_usec = 0;
+		s = select(sfd[3] + 1, &w, NULL, NULL, &to);
+		if (s == -1) {
+			perror("Failed sockmap select()");
+			goto out_sockmap;
+		} else if (!s) {
+			printf("Failed sockmap unexpected timeout\n");
+			goto out_sockmap;
+		}
+
+		if (!FD_ISSET(sfd[3], &w)) {
+			printf("Failed sockmap select/recv\n");
+			goto out_sockmap;
+		}
+
+		rc = recv(sfd[3], buf, sizeof(buf), 0);
+		if (rc < 0) {
+			printf("Failed sockmap recv\n");
+			goto out_sockmap;
+		}
+	}
+
+	/* Negative null entry lookup from datapath should be dropped */
+	buf[0] = 1;
+	buf[1] = 12;
+	sc = send(sfd[2], buf, 20, 0);
+	if (sc < 0) {
+		printf("Failed sockmap send\n");
+		goto out_sockmap;
+	}
+
+	/* Push fd into same slot */
+	i = 2;
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+	if (!err) {
+		printf("Failed allowed sockmap dup slot BPF_NOEXIST\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+	if (err) {
+		printf("Failed sockmap update new slot BPF_ANY\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+	if (err) {
+		printf("Failed sockmap update new slot BPF_EXIST\n");
+		goto out_sockmap;
+	}
+
+	/* Delete the elems without programs */
+	for (i = 2; i < 6; i++) {
+		err = bpf_map_delete_elem(fd, &i);
+		if (err) {
+			printf("Failed delete sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+		}
+	}
+
+	/* Test having multiple maps open and set with programs on same fds */
+	err = bpf_prog_attach(parse_prog, fd,
+			      BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err) {
+		printf("Failed fd bpf parse prog attach\n");
+		goto out_sockmap;
+	}
+	err = bpf_prog_attach(verdict_prog, fd,
+			      BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err) {
+		printf("Failed fd bpf verdict prog attach\n");
+		goto out_sockmap;
+	}
+
+	for (i = 4; i < 6; i++) {
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+		if (!err) {
+			printf("Failed allowed duplicate programs in update ANY sockmap %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+		if (!err) {
+			printf("Failed allowed duplicate program in update NOEXIST sockmap  %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+		err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+		if (!err) {
+			printf("Failed allowed duplicate program in update EXIST sockmap  %i '%i:%i'\n",
+			       err, i, sfd[i]);
+			goto out_sockmap;
+		}
+	}
+
+	/* Test tasks number of forked operations */
+	for (i = 0; i < tasks; i++) {
+		pid[i] = fork();
+		if (pid[i] == 0) {
+			for (i = 0; i < 6; i++) {
+				bpf_map_delete_elem(map_fd_tx, &i);
+				bpf_map_delete_elem(map_fd_rx, &i);
+				bpf_map_update_elem(map_fd_tx, &i,
+						    &sfd[i], BPF_ANY);
+				bpf_map_update_elem(map_fd_rx, &i,
+						    &sfd[i], BPF_ANY);
+			}
+			exit(0);
+		} else if (pid[i] == -1) {
+			printf("Couldn't spawn #%d process!\n", i);
+			exit(1);
+		}
+	}
+
+	for (i = 0; i < tasks; i++) {
+		int status;
+
+		assert(waitpid(pid[i], &status, 0) == pid[i]);
+		assert(status == 0);
+	}
+
+	err = bpf_prog_detach(map_fd_rx, __MAX_BPF_ATTACH_TYPE);
+	if (!err) {
+		printf("Detached an invalid prog type.\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_PARSER);
+	if (err) {
+		printf("Failed parser prog detach\n");
+		goto out_sockmap;
+	}
+
+	err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_VERDICT);
+	if (err) {
+		printf("Failed parser prog detach\n");
+		goto out_sockmap;
+	}
+
+	/* Test map close sockets and empty maps */
+	for (i = 0; i < 6; i++) {
+		bpf_map_delete_elem(map_fd_tx, &i);
+		bpf_map_delete_elem(map_fd_rx, &i);
+		close(sfd[i]);
+	}
+	close(fd);
+	close(map_fd_rx);
+	bpf_object__close(obj);
+	return;
+out:
+	for (i = 0; i < 6; i++)
+		close(sfd[i]);
+	printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno));
+	exit(1);
+out_sockmap:
+	for (i = 0; i < 6; i++) {
+		if (map_fd_tx)
+			bpf_map_delete_elem(map_fd_tx, &i);
+		if (map_fd_rx)
+			bpf_map_delete_elem(map_fd_rx, &i);
+		close(sfd[i]);
+	}
+	close(fd);
+	exit(1);
+}
+
+#define MAP_SIZE (32 * 1024)
+
+static void test_map_large(void)
+{
+	struct bigkey {
+		int a;
+		char b[116];
+		long long c;
+	} key;
+	int fd, i, value;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+			    MAP_SIZE, map_flags);
+	if (fd < 0) {
+		printf("Failed to create large map '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	for (i = 0; i < MAP_SIZE; i++) {
+		key = (struct bigkey) { .c = i };
+		value = i;
+
+		assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
+	}
+
+	key.c = -1;
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	       errno == E2BIG);
+
+	/* Iterate through all elements. */
+	assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+	key.c = -1;
+	for (i = 0; i < MAP_SIZE; i++)
+		assert(bpf_map_get_next_key(fd, &key, &key) == 0);
+	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+
+	key.c = 0;
+	assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
+	key.a = 1;
+	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+
+	close(fd);
+}
+
+#define run_parallel(N, FN, DATA) \
+	printf("Fork %d tasks to '" #FN "'\n", N); \
+	__run_parallel(N, FN, DATA)
+
+static void __run_parallel(int tasks, void (*fn)(int task, void *data),
+			   void *data)
+{
+	pid_t pid[tasks];
+	int i;
+
+	fflush(stdout);
+
+	for (i = 0; i < tasks; i++) {
+		pid[i] = fork();
+		if (pid[i] == 0) {
+			fn(i, data);
+			exit(0);
+		} else if (pid[i] == -1) {
+			printf("Couldn't spawn #%d process!\n", i);
+			exit(1);
+		}
+	}
+
+	for (i = 0; i < tasks; i++) {
+		int status;
+
+		assert(waitpid(pid[i], &status, 0) == pid[i]);
+		assert(status == 0);
+	}
+}
+
+static void test_map_stress(void)
+{
+	run_parallel(100, test_hashmap, NULL);
+	run_parallel(100, test_hashmap_percpu, NULL);
+	run_parallel(100, test_hashmap_sizes, NULL);
+	run_parallel(100, test_hashmap_walk, NULL);
+
+	run_parallel(100, test_arraymap, NULL);
+	run_parallel(100, test_arraymap_percpu, NULL);
+}
+
+#define TASKS 1024
+
+#define DO_UPDATE 1
+#define DO_DELETE 0
+
+static void test_update_delete(int fn, void *data)
+{
+	int do_update = ((int *)data)[1];
+	int fd = ((int *)data)[0];
+	int i, key, value;
+
+	for (i = fn; i < MAP_SIZE; i += TASKS) {
+		key = value = i;
+
+		if (do_update) {
+			assert(bpf_map_update_elem(fd, &key, &value,
+						   BPF_NOEXIST) == 0);
+			assert(bpf_map_update_elem(fd, &key, &value,
+						   BPF_EXIST) == 0);
+		} else {
+			assert(bpf_map_delete_elem(fd, &key) == 0);
+		}
+	}
+}
+
+static void test_map_parallel(void)
+{
+	int i, fd, key = 0, value = 0;
+	int data[2];
+
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+			    MAP_SIZE, map_flags);
+	if (fd < 0) {
+		printf("Failed to create map for parallel test '%s'!\n",
+		       strerror(errno));
+		exit(1);
+	}
+
+	/* Use the same fd in children to add elements to this map:
+	 * child_0 adds key=0, key=1024, key=2048, ...
+	 * child_1 adds key=1, key=1025, key=2049, ...
+	 * child_1023 adds key=1023, ...
+	 */
+	data[0] = fd;
+	data[1] = DO_UPDATE;
+	run_parallel(TASKS, test_update_delete, data);
+
+	/* Check that key=0 is already there. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+	       errno == EEXIST);
+
+	/* Check that all elements were inserted. */
+	assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+	key = -1;
+	for (i = 0; i < MAP_SIZE; i++)
+		assert(bpf_map_get_next_key(fd, &key, &key) == 0);
+	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+
+	/* Another check for all elements */
+	for (i = 0; i < MAP_SIZE; i++) {
+		key = MAP_SIZE - i - 1;
+
+		assert(bpf_map_lookup_elem(fd, &key, &value) == 0 &&
+		       value == key);
+	}
+
+	/* Now let's delete all elemenets in parallel. */
+	data[1] = DO_DELETE;
+	run_parallel(TASKS, test_update_delete, data);
+
+	/* Nothing should be left. */
+	key = -1;
+	assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
+	assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+}
+
+static void test_map_rdonly(void)
+{
+	int fd, key = 0, value = 0;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+			    MAP_SIZE, map_flags | BPF_F_RDONLY);
+	if (fd < 0) {
+		printf("Failed to create map for read only test '%s'!\n",
+		       strerror(errno));
+		exit(1);
+	}
+
+	key = 1;
+	value = 1234;
+	/* Insert key=1 element. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+	       errno == EPERM);
+
+	/* Check that key=2 is not found. */
+	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+}
+
+static void test_map_wronly(void)
+{
+	int fd, key = 0, value = 0;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+			    MAP_SIZE, map_flags | BPF_F_WRONLY);
+	if (fd < 0) {
+		printf("Failed to create map for read only test '%s'!\n",
+		       strerror(errno));
+		exit(1);
+	}
+
+	key = 1;
+	value = 1234;
+	/* Insert key=1 element. */
+	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+	/* Check that key=2 is not found. */
+	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
+	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+}
+
+static void prepare_reuseport_grp(int type, int map_fd,
+				  __s64 *fds64, __u64 *sk_cookies,
+				  unsigned int n)
+{
+	socklen_t optlen, addrlen;
+	struct sockaddr_in6 s6;
+	const __u32 index0 = 0;
+	const int optval = 1;
+	unsigned int i;
+	u64 sk_cookie;
+	__s64 fd64;
+	int err;
+
+	s6.sin6_family = AF_INET6;
+	s6.sin6_addr = in6addr_any;
+	s6.sin6_port = 0;
+	addrlen = sizeof(s6);
+	optlen = sizeof(sk_cookie);
+
+	for (i = 0; i < n; i++) {
+		fd64 = socket(AF_INET6, type, 0);
+		CHECK(fd64 == -1, "socket()",
+		      "sock_type:%d fd64:%lld errno:%d\n",
+		      type, fd64, errno);
+
+		err = setsockopt(fd64, SOL_SOCKET, SO_REUSEPORT,
+				 &optval, sizeof(optval));
+		CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
+		      "err:%d errno:%d\n", err, errno);
+
+		/* reuseport_array does not allow unbound sk */
+		err = bpf_map_update_elem(map_fd, &index0, &fd64,
+					  BPF_ANY);
+		CHECK(err != -1 || errno != EINVAL,
+		      "reuseport array update unbound sk",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+
+		err = bind(fd64, (struct sockaddr *)&s6, sizeof(s6));
+		CHECK(err == -1, "bind()",
+		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+		if (i == 0) {
+			err = getsockname(fd64, (struct sockaddr *)&s6,
+					  &addrlen);
+			CHECK(err == -1, "getsockname()",
+			      "sock_type:%d err:%d errno:%d\n",
+			      type, err, errno);
+		}
+
+		err = getsockopt(fd64, SOL_SOCKET, SO_COOKIE, &sk_cookie,
+				 &optlen);
+		CHECK(err == -1, "getsockopt(SO_COOKIE)",
+		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+		if (type == SOCK_STREAM) {
+			/*
+			 * reuseport_array does not allow
+			 * non-listening tcp sk.
+			 */
+			err = bpf_map_update_elem(map_fd, &index0, &fd64,
+						  BPF_ANY);
+			CHECK(err != -1 || errno != EINVAL,
+			      "reuseport array update non-listening sk",
+			      "sock_type:%d err:%d errno:%d\n",
+			      type, err, errno);
+			err = listen(fd64, 0);
+			CHECK(err == -1, "listen()",
+			      "sock_type:%d, err:%d errno:%d\n",
+			      type, err, errno);
+		}
+
+		fds64[i] = fd64;
+		sk_cookies[i] = sk_cookie;
+	}
+}
+
+static void test_reuseport_array(void)
+{
+#define REUSEPORT_FD_IDX(err, last) ({ (err) ? last : !last; })
+
+	const __u32 array_size = 4, index0 = 0, index3 = 3;
+	int types[2] = { SOCK_STREAM, SOCK_DGRAM }, type;
+	__u64 grpa_cookies[2], sk_cookie, map_cookie;
+	__s64 grpa_fds64[2] = { -1, -1 }, fd64 = -1;
+	const __u32 bad_index = array_size;
+	int map_fd, err, t, f;
+	__u32 fds_idx = 0;
+	int fd;
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+				sizeof(__u32), sizeof(__u64), array_size, 0);
+	CHECK(map_fd == -1, "reuseport array create",
+	      "map_fd:%d, errno:%d\n", map_fd, errno);
+
+	/* Test lookup/update/delete with invalid index */
+	err = bpf_map_delete_elem(map_fd, &bad_index);
+	CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+	      "err:%d errno:%d\n", err, errno);
+
+	err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
+	CHECK(err != -1 || errno != E2BIG,
+	      "reuseport array update >=max_entries",
+	      "err:%d errno:%d\n", err, errno);
+
+	err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
+	CHECK(err != -1 || errno != ENOENT,
+	      "reuseport array update >=max_entries",
+	      "err:%d errno:%d\n", err, errno);
+
+	/* Test lookup/delete non existence elem */
+	err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+	CHECK(err != -1 || errno != ENOENT,
+	      "reuseport array lookup not-exist elem",
+	      "err:%d errno:%d\n", err, errno);
+	err = bpf_map_delete_elem(map_fd, &index3);
+	CHECK(err != -1 || errno != ENOENT,
+	      "reuseport array del not-exist elem",
+	      "err:%d errno:%d\n", err, errno);
+
+	for (t = 0; t < ARRAY_SIZE(types); t++) {
+		type = types[t];
+
+		prepare_reuseport_grp(type, map_fd, grpa_fds64,
+				      grpa_cookies, ARRAY_SIZE(grpa_fds64));
+
+		/* Test BPF_* update flags */
+		/* BPF_EXIST failure case */
+		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+					  BPF_EXIST);
+		CHECK(err != -1 || errno != ENOENT,
+		      "reuseport array update empty elem BPF_EXIST",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+		fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+		/* BPF_NOEXIST success case */
+		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+					  BPF_NOEXIST);
+		CHECK(err == -1,
+		      "reuseport array update empty elem BPF_NOEXIST",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+		fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+		/* BPF_EXIST success case. */
+		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+					  BPF_EXIST);
+		CHECK(err == -1,
+		      "reuseport array update same elem BPF_EXIST",
+		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
+		fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+		/* BPF_NOEXIST failure case */
+		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+					  BPF_NOEXIST);
+		CHECK(err != -1 || errno != EEXIST,
+		      "reuseport array update non-empty elem BPF_NOEXIST",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+		fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+		/* BPF_ANY case (always succeed) */
+		err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+					  BPF_ANY);
+		CHECK(err == -1,
+		      "reuseport array update same sk with BPF_ANY",
+		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+		fd64 = grpa_fds64[fds_idx];
+		sk_cookie = grpa_cookies[fds_idx];
+
+		/* The same sk cannot be added to reuseport_array twice */
+		err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
+		CHECK(err != -1 || errno != EBUSY,
+		      "reuseport array update same sk with same index",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+
+		err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
+		CHECK(err != -1 || errno != EBUSY,
+		      "reuseport array update same sk with different index",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+
+		/* Test delete elem */
+		err = bpf_map_delete_elem(map_fd, &index3);
+		CHECK(err == -1, "reuseport array delete sk",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+
+		/* Add it back with BPF_NOEXIST */
+		err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
+		CHECK(err == -1,
+		      "reuseport array re-add with BPF_NOEXIST after del",
+		      "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+		/* Test cookie */
+		err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+		CHECK(err == -1 || sk_cookie != map_cookie,
+		      "reuseport array lookup re-added sk",
+		      "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
+		      type, err, errno, sk_cookie, map_cookie);
+
+		/* Test elem removed by close() */
+		for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
+			close(grpa_fds64[f]);
+		err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+		CHECK(err != -1 || errno != ENOENT,
+		      "reuseport array lookup after close()",
+		      "sock_type:%d err:%d errno:%d\n",
+		      type, err, errno);
+	}
+
+	/* Test SOCK_RAW */
+	fd64 = socket(AF_INET6, SOCK_RAW, IPPROTO_UDP);
+	CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
+	      err, errno);
+	err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
+	CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+	      "err:%d errno:%d\n", err, errno);
+	close(fd64);
+
+	/* Close the 64 bit value map */
+	close(map_fd);
+
+	/* Test 32 bit fd */
+	map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+				sizeof(__u32), sizeof(__u32), array_size, 0);
+	CHECK(map_fd == -1, "reuseport array create",
+	      "map_fd:%d, errno:%d\n", map_fd, errno);
+	prepare_reuseport_grp(SOCK_STREAM, map_fd, &fd64, &sk_cookie, 1);
+	fd = fd64;
+	err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
+	CHECK(err == -1, "reuseport array update 32 bit fd",
+	      "err:%d errno:%d\n", err, errno);
+	err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+	CHECK(err != -1 || errno != ENOSPC,
+	      "reuseport array lookup 32 bit fd",
+	      "err:%d errno:%d\n", err, errno);
+	close(fd);
+	close(map_fd);
+}
+
+static void run_all_tests(void)
+{
+	test_hashmap(0, NULL);
+	test_hashmap_percpu(0, NULL);
+	test_hashmap_walk(0, NULL);
+
+	test_arraymap(0, NULL);
+	test_arraymap_percpu(0, NULL);
+
+	test_arraymap_percpu_many_keys();
+
+	test_devmap(0, NULL);
+	test_sockmap(0, NULL);
+
+	test_map_large();
+	test_map_parallel();
+	test_map_stress();
+
+	test_map_rdonly();
+	test_map_wronly();
+
+	test_reuseport_array();
+}
+
+int main(void)
+{
+	map_flags = 0;
+	run_all_tests();
+
+	map_flags = BPF_F_NO_PREALLOC;
+	run_all_tests();
+
+	printf("test_maps: OK\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_obj_id.c b/tools/testing/selftests/bpf/test_obj_id.c
new file mode 100644
index 000000000..880d2963b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_obj_id.c
@@ -0,0 +1,35 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+/* It is a dumb bpf program such that it must have no
+ * issue to be loaded since testing the verifier is
+ * not the focus here.
+ */
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") test_map_id = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 1,
+};
+
+SEC("test_obj_id_dummy")
+int test_obj_id(struct __sk_buff *skb)
+{
+	__u32 key = 0;
+	__u64 *value;
+
+	value = bpf_map_lookup_elem(&test_map_id, &key);
+
+	return TC_ACT_OK;
+}
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
new file mode 100755
index 000000000..6b46db61c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -0,0 +1,1288 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2017 Netronome Systems, Inc.
+#
+# This software is licensed under the GNU General License Version 2,
+# June 1991 as shown in the file COPYING in the top-level directory of this
+# source tree.
+#
+# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+from datetime import datetime
+import argparse
+import json
+import os
+import pprint
+import random
+import string
+import struct
+import subprocess
+import time
+
+logfile = None
+log_level = 1
+skip_extack = False
+bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
+pp = pprint.PrettyPrinter()
+devs = [] # devices we created for clean up
+files = [] # files to be removed
+netns = [] # net namespaces to be removed
+
+def log_get_sec(level=0):
+    return "*" * (log_level + level)
+
+def log_level_inc(add=1):
+    global log_level
+    log_level += add
+
+def log_level_dec(sub=1):
+    global log_level
+    log_level -= sub
+
+def log_level_set(level):
+    global log_level
+    log_level = level
+
+def log(header, data, level=None):
+    """
+    Output to an optional log.
+    """
+    if logfile is None:
+        return
+    if level is not None:
+        log_level_set(level)
+
+    if not isinstance(data, str):
+        data = pp.pformat(data)
+
+    if len(header):
+        logfile.write("\n" + log_get_sec() + " ")
+        logfile.write(header)
+    if len(header) and len(data.strip()):
+        logfile.write("\n")
+    logfile.write(data)
+
+def skip(cond, msg):
+    if not cond:
+        return
+    print("SKIP: " + msg)
+    log("SKIP: " + msg, "", level=1)
+    os.sys.exit(0)
+
+def fail(cond, msg):
+    if not cond:
+        return
+    print("FAIL: " + msg)
+    log("FAIL: " + msg, "", level=1)
+    os.sys.exit(1)
+
+def start_test(msg):
+    log(msg, "", level=1)
+    log_level_inc()
+    print(msg)
+
+def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True):
+    """
+    Run a command in subprocess and return tuple of (retval, stdout);
+    optionally return stderr as well as third value.
+    """
+    proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE,
+                            stderr=subprocess.PIPE)
+    if background:
+        msg = "%s START: %s" % (log_get_sec(1),
+                                datetime.now().strftime("%H:%M:%S.%f"))
+        log("BKG " + proc.args, msg)
+        return proc
+
+    return cmd_result(proc, include_stderr=include_stderr, fail=fail)
+
+def cmd_result(proc, include_stderr=False, fail=False):
+    stdout, stderr = proc.communicate()
+    stdout = stdout.decode("utf-8")
+    stderr = stderr.decode("utf-8")
+    proc.stdout.close()
+    proc.stderr.close()
+
+    stderr = "\n" + stderr
+    if stderr[-1] == "\n":
+        stderr = stderr[:-1]
+
+    sec = log_get_sec(1)
+    log("CMD " + proc.args,
+        "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" %
+        (proc.returncode, sec, stdout, sec, stderr,
+         sec, datetime.now().strftime("%H:%M:%S.%f")))
+
+    if proc.returncode != 0 and fail:
+        if len(stderr) > 0 and stderr[-1] == "\n":
+            stderr = stderr[:-1]
+        raise Exception("Command failed: %s\n%s" % (proc.args, stderr))
+
+    if include_stderr:
+        return proc.returncode, stdout, stderr
+    else:
+        return proc.returncode, stdout
+
+def rm(f):
+    cmd("rm -f %s" % (f))
+    if f in files:
+        files.remove(f)
+
+def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
+    params = ""
+    if JSON:
+        params += "%s " % (flags["json"])
+
+    if ns != "":
+        ns = "ip netns exec %s " % (ns)
+
+    if include_stderr:
+        ret, stdout, stderr = cmd(ns + name + " " + params + args,
+                                  fail=fail, include_stderr=True)
+    else:
+        ret, stdout = cmd(ns + name + " " + params + args,
+                          fail=fail, include_stderr=False)
+
+    if JSON and len(stdout.strip()) != 0:
+        out = json.loads(stdout)
+    else:
+        out = stdout
+
+    if include_stderr:
+        return ret, out, stderr
+    else:
+        return ret, out
+
+def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False):
+    return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns,
+                fail=fail, include_stderr=include_stderr)
+
+def bpftool_prog_list(expected=None, ns=""):
+    _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
+    # Remove the base progs
+    for p in base_progs:
+        if p in progs:
+            progs.remove(p)
+    if expected is not None:
+        if len(progs) != expected:
+            fail(True, "%d BPF programs loaded, expected %d" %
+                 (len(progs), expected))
+    return progs
+
+def bpftool_map_list(expected=None, ns=""):
+    _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+    # Remove the base maps
+    for m in base_maps:
+        if m in maps:
+            maps.remove(m)
+    if expected is not None:
+        if len(maps) != expected:
+            fail(True, "%d BPF maps loaded, expected %d" %
+                 (len(maps), expected))
+    return maps
+
+def bpftool_prog_list_wait(expected=0, n_retry=20):
+    for i in range(n_retry):
+        nprogs = len(bpftool_prog_list())
+        if nprogs == expected:
+            return
+        time.sleep(0.05)
+    raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
+
+def bpftool_map_list_wait(expected=0, n_retry=20):
+    for i in range(n_retry):
+        nmaps = len(bpftool_map_list())
+        if nmaps == expected:
+            return
+        time.sleep(0.05)
+    raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
+
+def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
+                      fail=True, include_stderr=False):
+    args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
+    if prog_type is not None:
+        args += " type " + prog_type
+    if dev is not None:
+        args += " dev " + dev
+    if len(maps):
+        args += " map " + " map ".join(maps)
+
+    res = bpftool(args, fail=fail, include_stderr=include_stderr)
+    if res[0] == 0:
+        files.append(file_name)
+    return res
+
+def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False):
+    if force:
+        args = "-force " + args
+    return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns,
+                fail=fail, include_stderr=include_stderr)
+
+def tc(args, JSON=True, ns="", fail=True, include_stderr=False):
+    return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns,
+                fail=fail, include_stderr=include_stderr)
+
+def ethtool(dev, opt, args, fail=True):
+    return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
+
+def bpf_obj(name, sec=".text", path=bpf_test_dir,):
+    return "obj %s sec %s" % (os.path.join(path, name), sec)
+
+def bpf_pinned(name):
+    return "pinned %s" % (name)
+
+def bpf_bytecode(bytecode):
+    return "bytecode \"%s\"" % (bytecode)
+
+def mknetns(n_retry=10):
+    for i in range(n_retry):
+        name = ''.join([random.choice(string.ascii_letters) for i in range(8)])
+        ret, _ = ip("netns add %s" % (name), fail=False)
+        if ret == 0:
+            netns.append(name)
+            return name
+    return None
+
+def int2str(fmt, val):
+    ret = []
+    for b in struct.pack(fmt, val):
+        ret.append(int(b))
+    return " ".join(map(lambda x: str(x), ret))
+
+def str2int(strtab):
+    inttab = []
+    for i in strtab:
+        inttab.append(int(i, 16))
+    ba = bytearray(inttab)
+    if len(strtab) == 4:
+        fmt = "I"
+    elif len(strtab) == 8:
+        fmt = "Q"
+    else:
+        raise Exception("String array of len %d can't be unpacked to an int" %
+                        (len(strtab)))
+    return struct.unpack(fmt, ba)[0]
+
+class DebugfsDir:
+    """
+    Class for accessing DebugFS directories as a dictionary.
+    """
+
+    def __init__(self, path):
+        self.path = path
+        self._dict = self._debugfs_dir_read(path)
+
+    def __len__(self):
+        return len(self._dict.keys())
+
+    def __getitem__(self, key):
+        if type(key) is int:
+            key = list(self._dict.keys())[key]
+        return self._dict[key]
+
+    def __setitem__(self, key, value):
+        log("DebugFS set %s = %s" % (key, value), "")
+        log_level_inc()
+
+        cmd("echo '%s' > %s/%s" % (value, self.path, key))
+        log_level_dec()
+
+        _, out = cmd('cat %s/%s' % (self.path, key))
+        self._dict[key] = out.strip()
+
+    def _debugfs_dir_read(self, path):
+        dfs = {}
+
+        log("DebugFS state for %s" % (path), "")
+        log_level_inc(add=2)
+
+        _, out = cmd('ls ' + path)
+        for f in out.split():
+            p = os.path.join(path, f)
+            if os.path.isfile(p):
+                _, out = cmd('cat %s/%s' % (path, f))
+                dfs[f] = out.strip()
+            elif os.path.isdir(p):
+                dfs[f] = DebugfsDir(p)
+            else:
+                raise Exception("%s is neither file nor directory" % (p))
+
+        log_level_dec()
+        log("DebugFS state", dfs)
+        log_level_dec()
+
+        return dfs
+
+class NetdevSim:
+    """
+    Class for netdevsim netdevice and its attributes.
+    """
+
+    def __init__(self, link=None):
+        self.link = link
+
+        self.dev = self._netdevsim_create()
+        devs.append(self)
+
+        self.ns = ""
+
+        self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
+        self.sdev_dir = self.dfs_dir + '/sdev/'
+        self.dfs_refresh()
+
+    def __getitem__(self, key):
+        return self.dev[key]
+
+    def _netdevsim_create(self):
+        link = "" if self.link is None else "link " + self.link.dev['ifname']
+        _, old  = ip("link show")
+        ip("link add sim%d {link} type netdevsim".format(link=link))
+        _, new  = ip("link show")
+
+        for dev in new:
+            f = filter(lambda x: x["ifname"] == dev["ifname"], old)
+            if len(list(f)) == 0:
+                return dev
+
+        raise Exception("failed to create netdevsim device")
+
+    def remove(self):
+        devs.remove(self)
+        ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns)
+
+    def dfs_refresh(self):
+        self.dfs = DebugfsDir(self.dfs_dir)
+        return self.dfs
+
+    def dfs_read(self, f):
+        path = os.path.join(self.dfs_dir, f)
+        _, data = cmd('cat %s' % (path))
+        return data.strip()
+
+    def dfs_num_bound_progs(self):
+        path = os.path.join(self.sdev_dir, "bpf_bound_progs")
+        _, progs = cmd('ls %s' % (path))
+        return len(progs.split())
+
+    def dfs_get_bound_progs(self, expected):
+        progs = DebugfsDir(os.path.join(self.sdev_dir, "bpf_bound_progs"))
+        if expected is not None:
+            if len(progs) != expected:
+                fail(True, "%d BPF programs bound, expected %d" %
+                     (len(progs), expected))
+        return progs
+
+    def wait_for_flush(self, bound=0, total=0, n_retry=20):
+        for i in range(n_retry):
+            nbound = self.dfs_num_bound_progs()
+            nprogs = len(bpftool_prog_list())
+            if nbound == bound and nprogs == total:
+                return
+            time.sleep(0.05)
+        raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
+
+    def set_ns(self, ns):
+        name = "1" if ns == "" else ns
+        ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
+        self.ns = ns
+
+    def set_mtu(self, mtu, fail=True):
+        return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
+                  fail=fail)
+
+    def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False,
+                fail=True, include_stderr=False):
+        if verbose:
+            bpf += " verbose"
+        return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
+                  force=force, JSON=JSON,
+                  fail=fail, include_stderr=include_stderr)
+
+    def unset_xdp(self, mode, force=False, JSON=True,
+                  fail=True, include_stderr=False):
+        return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
+                  force=force, JSON=JSON,
+                  fail=fail, include_stderr=include_stderr)
+
+    def ip_link_show(self, xdp):
+        _, link = ip("link show dev %s" % (self['ifname']))
+        if len(link) > 1:
+            raise Exception("Multiple objects on ip link show")
+        if len(link) < 1:
+            return {}
+        fail(xdp != "xdp" in link,
+             "XDP program not reporting in iplink (reported %s, expected %s)" %
+             ("xdp" in link, xdp))
+        return link[0]
+
+    def tc_add_ingress(self):
+        tc("qdisc add dev %s ingress" % (self['ifname']))
+
+    def tc_del_ingress(self):
+        tc("qdisc del dev %s ingress" % (self['ifname']))
+
+    def tc_flush_filters(self, bound=0, total=0):
+        self.tc_del_ingress()
+        self.tc_add_ingress()
+        self.wait_for_flush(bound=bound, total=total)
+
+    def tc_show_ingress(self, expected=None):
+        # No JSON support, oh well...
+        flags = ["skip_sw", "skip_hw", "in_hw"]
+        named = ["protocol", "pref", "chain", "handle", "id", "tag"]
+
+        args = "-s filter show dev %s ingress" % (self['ifname'])
+        _, out = tc(args, JSON=False)
+
+        filters = []
+        lines = out.split('\n')
+        for line in lines:
+            words = line.split()
+            if "handle" not in words:
+                continue
+            fltr = {}
+            for flag in flags:
+                fltr[flag] = flag in words
+            for name in named:
+                try:
+                    idx = words.index(name)
+                    fltr[name] = words[idx + 1]
+                except ValueError:
+                    pass
+            filters.append(fltr)
+
+        if expected is not None:
+            fail(len(filters) != expected,
+                 "%d ingress filters loaded, expected %d" %
+                 (len(filters), expected))
+        return filters
+
+    def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None,
+                      chain=None, cls="", params="",
+                      fail=True, include_stderr=False):
+        spec = ""
+        if prio is not None:
+            spec += " prio %d" % (prio)
+        if handle:
+            spec += " handle %s" % (handle)
+        if chain is not None:
+            spec += " chain %d" % (chain)
+
+        return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\
+                  .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec,
+                          cls=cls, params=params),
+                  fail=fail, include_stderr=include_stderr)
+
+    def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None,
+                           chain=None, da=False, verbose=False,
+                           skip_sw=False, skip_hw=False,
+                           fail=True, include_stderr=False):
+        cls = "bpf " + bpf
+
+        params = ""
+        if da:
+            params += " da"
+        if verbose:
+            params += " verbose"
+        if skip_sw:
+            params += " skip_sw"
+        if skip_hw:
+            params += " skip_hw"
+
+        return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls,
+                                  chain=chain, params=params,
+                                  fail=fail, include_stderr=include_stderr)
+
+    def set_ethtool_tc_offloads(self, enable, fail=True):
+        args = "hw-tc-offload %s" % ("on" if enable else "off")
+        return ethtool(self, "-K", args, fail=fail)
+
+################################################################################
+def clean_up():
+    global files, netns, devs
+
+    for dev in devs:
+        dev.remove()
+    for f in files:
+        cmd("rm -f %s" % (f))
+    for ns in netns:
+        cmd("ip netns delete %s" % (ns))
+    files = []
+    netns = []
+
+def pin_prog(file_name, idx=0):
+    progs = bpftool_prog_list(expected=(idx + 1))
+    prog = progs[idx]
+    bpftool("prog pin id %d %s" % (prog["id"], file_name))
+    files.append(file_name)
+
+    return file_name, bpf_pinned(file_name)
+
+def pin_map(file_name, idx=0, expected=1):
+    maps = bpftool_map_list(expected=expected)
+    m = maps[idx]
+    bpftool("map pin id %d %s" % (m["id"], file_name))
+    files.append(file_name)
+
+    return file_name, bpf_pinned(file_name)
+
+def check_dev_info_removed(prog_file=None, map_file=None):
+    bpftool_prog_list(expected=0)
+    ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
+    fail(ret == 0, "Showing prog with removed device did not fail")
+    fail(err["error"].find("No such device") == -1,
+         "Showing prog with removed device expected ENODEV, error is %s" %
+         (err["error"]))
+
+    bpftool_map_list(expected=0)
+    ret, err = bpftool("map show pin %s" % (map_file), fail=False)
+    fail(ret == 0, "Showing map with removed device did not fail")
+    fail(err["error"].find("No such device") == -1,
+         "Showing map with removed device expected ENODEV, error is %s" %
+         (err["error"]))
+
+def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
+    progs = bpftool_prog_list(expected=1, ns=ns)
+    prog = progs[0]
+
+    fail("dev" not in prog.keys(), "Device parameters not reported")
+    dev = prog["dev"]
+    fail("ifindex" not in dev.keys(), "Device parameters not reported")
+    fail("ns_dev" not in dev.keys(), "Device parameters not reported")
+    fail("ns_inode" not in dev.keys(), "Device parameters not reported")
+
+    if not other_ns:
+        fail("ifname" not in dev.keys(), "Ifname not reported")
+        fail(dev["ifname"] != sim["ifname"],
+             "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
+    else:
+        fail("ifname" in dev.keys(), "Ifname is reported for other ns")
+
+    maps = bpftool_map_list(expected=2, ns=ns)
+    for m in maps:
+        fail("dev" not in m.keys(), "Device parameters not reported")
+        fail(dev != m["dev"], "Map's device different than program's")
+
+def check_extack(output, reference, args):
+    if skip_extack:
+        return
+    lines = output.split("\n")
+    comp = len(lines) >= 2 and lines[1] == 'Error: ' + reference
+    fail(not comp, "Missing or incorrect netlink extack message")
+
+def check_extack_nsim(output, reference, args):
+    check_extack(output, "netdevsim: " + reference, args)
+
+def check_no_extack(res, needle):
+    fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"),
+         "Found '%s' in command output, leaky extack?" % (needle))
+
+def check_verifier_log(output, reference):
+    lines = output.split("\n")
+    for l in reversed(lines):
+        if l == reference:
+            return
+    fail(True, "Missing or incorrect message from netdevsim in verifier log")
+
+def test_spurios_extack(sim, obj, skip_hw, needle):
+    res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw,
+                                 include_stderr=True)
+    check_no_extack(res, needle)
+    res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+                                 skip_hw=skip_hw, include_stderr=True)
+    check_no_extack(res, needle)
+    res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf",
+                            include_stderr=True)
+    check_no_extack(res, needle)
+
+
+# Parse command line
+parser = argparse.ArgumentParser()
+parser.add_argument("--log", help="output verbose log to given file")
+args = parser.parse_args()
+if args.log:
+    logfile = open(args.log, 'w+')
+    logfile.write("# -*-Org-*-")
+
+log("Prepare...", "", level=1)
+log_level_inc()
+
+# Check permissions
+skip(os.getuid() != 0, "test must be run as root")
+
+# Check tools
+ret, progs = bpftool("prog", fail=False)
+skip(ret != 0, "bpftool not installed")
+base_progs = progs
+_, base_maps = bpftool("map")
+
+# Check netdevsim
+ret, out = cmd("modprobe netdevsim", fail=False)
+skip(ret != 0, "netdevsim module could not be loaded")
+
+# Check debugfs
+_, out = cmd("mount")
+if out.find("/sys/kernel/debug type debugfs") == -1:
+    cmd("mount -t debugfs none /sys/kernel/debug")
+
+# Check samples are compiled
+samples = ["sample_ret0.o", "sample_map_ret0.o"]
+for s in samples:
+    ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
+    skip(ret != 0, "sample %s/%s not found, please compile it" %
+         (bpf_test_dir, s))
+
+# Check if iproute2 is built with libmnl (needed by extack support)
+_, _, err = cmd("tc qdisc delete dev lo handle 0",
+                fail=False, include_stderr=True)
+if err.find("Error: Failed to find qdisc with specified handle.") == -1:
+    print("Warning: no extack message in iproute2 output, libmnl missing?")
+    log("Warning: no extack message in iproute2 output, libmnl missing?", "")
+    skip_extack = True
+
+# Check if net namespaces seem to work
+ns = mknetns()
+skip(ns is None, "Could not create a net namespace")
+cmd("ip netns delete %s" % (ns))
+netns = []
+
+try:
+    obj = bpf_obj("sample_ret0.o")
+    bytecode = bpf_bytecode("1,6 0 0 4294967295,")
+
+    start_test("Test destruction of generic XDP...")
+    sim = NetdevSim()
+    sim.set_xdp(obj, "generic")
+    sim.remove()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+    sim.tc_add_ingress()
+
+    start_test("Test TC non-offloaded...")
+    ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False)
+    fail(ret != 0, "Software TC filter did not load")
+
+    start_test("Test TC non-offloaded isn't getting bound...")
+    ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+    fail(ret != 0, "Software TC filter did not load")
+    sim.dfs_get_bound_progs(expected=0)
+
+    sim.tc_flush_filters()
+
+    start_test("Test TC offloads are off by default...")
+    ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+                                         fail=False, include_stderr=True)
+    fail(ret == 0, "TC filter loaded without enabling TC offloads")
+    check_extack(err, "TC offload is disabled on net device.", args)
+    sim.wait_for_flush()
+
+    sim.set_ethtool_tc_offloads(True)
+    sim.dfs["bpf_tc_non_bound_accept"] = "Y"
+
+    start_test("Test TC offload by default...")
+    ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+    fail(ret != 0, "Software TC filter did not load")
+    sim.dfs_get_bound_progs(expected=0)
+    ingress = sim.tc_show_ingress(expected=1)
+    fltr = ingress[0]
+    fail(not fltr["in_hw"], "Filter not offloaded by default")
+
+    sim.tc_flush_filters()
+
+    start_test("Test TC cBPF bytcode tries offload by default...")
+    ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False)
+    fail(ret != 0, "Software TC filter did not load")
+    sim.dfs_get_bound_progs(expected=0)
+    ingress = sim.tc_show_ingress(expected=1)
+    fltr = ingress[0]
+    fail(not fltr["in_hw"], "Bytecode not offloaded by default")
+
+    sim.tc_flush_filters()
+    sim.dfs["bpf_tc_non_bound_accept"] = "N"
+
+    start_test("Test TC cBPF unbound bytecode doesn't offload...")
+    ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True,
+                                         fail=False, include_stderr=True)
+    fail(ret == 0, "TC bytecode loaded for offload")
+    check_extack_nsim(err, "netdevsim configured to reject unbound programs.",
+                      args)
+    sim.wait_for_flush()
+
+    start_test("Test non-0 chain offload...")
+    ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1,
+                                         skip_sw=True,
+                                         fail=False, include_stderr=True)
+    fail(ret == 0, "Offloaded a filter to chain other than 0")
+    check_extack(err, "Driver supports only offload of chain 0.", args)
+    sim.tc_flush_filters()
+
+    start_test("Test TC replace...")
+    sim.cls_bpf_add_filter(obj, prio=1, handle=1)
+    sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1)
+    sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+    sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True)
+    sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True)
+    sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+    sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True)
+    sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True)
+    sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+    start_test("Test TC replace bad flags...")
+    for i in range(3):
+        for j in range(3):
+            ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+                                            skip_sw=(j == 1), skip_hw=(j == 2),
+                                            fail=False)
+            fail(bool(ret) != bool(j),
+                 "Software TC incorrect load in replace test, iteration %d" %
+                 (j))
+        sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+    start_test("Test spurious extack from the driver...")
+    test_spurios_extack(sim, obj, False, "netdevsim")
+    test_spurios_extack(sim, obj, True, "netdevsim")
+
+    sim.set_ethtool_tc_offloads(False)
+
+    test_spurios_extack(sim, obj, False, "TC offload is disabled")
+    test_spurios_extack(sim, obj, True, "TC offload is disabled")
+
+    sim.set_ethtool_tc_offloads(True)
+
+    sim.tc_flush_filters()
+
+    start_test("Test TC offloads work...")
+    ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+                                         fail=False, include_stderr=True)
+    fail(ret != 0, "TC filter did not load with TC offloads enabled")
+    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+
+    start_test("Test TC offload basics...")
+    dfs = sim.dfs_get_bound_progs(expected=1)
+    progs = bpftool_prog_list(expected=1)
+    ingress = sim.tc_show_ingress(expected=1)
+
+    dprog = dfs[0]
+    prog = progs[0]
+    fltr = ingress[0]
+    fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter")
+    fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter")
+    fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back")
+
+    start_test("Test TC offload is device-bound...")
+    fail(str(prog["id"]) != fltr["id"], "Program IDs don't match")
+    fail(prog["tag"] != fltr["tag"], "Program tags don't match")
+    fail(fltr["id"] != dprog["id"], "Program IDs don't match")
+    fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+    fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+    start_test("Test disabling TC offloads is rejected while filters installed...")
+    ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+    fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...")
+    sim.set_ethtool_tc_offloads(True)
+
+    start_test("Test qdisc removal frees things...")
+    sim.tc_flush_filters()
+    sim.tc_show_ingress(expected=0)
+
+    start_test("Test disabling TC offloads is OK without filters...")
+    ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+    fail(ret != 0,
+         "Driver refused to disable TC offloads without filters installed...")
+
+    sim.set_ethtool_tc_offloads(True)
+
+    start_test("Test destroying device gets rid of TC filters...")
+    sim.cls_bpf_add_filter(obj, skip_sw=True)
+    sim.remove()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+    sim.set_ethtool_tc_offloads(True)
+
+    start_test("Test destroying device gets rid of XDP...")
+    sim.set_xdp(obj, "offload")
+    sim.remove()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+    sim.set_ethtool_tc_offloads(True)
+
+    start_test("Test XDP prog reporting...")
+    sim.set_xdp(obj, "drv")
+    ipl = sim.ip_link_show(xdp=True)
+    progs = bpftool_prog_list(expected=1)
+    fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+         "Loaded program has wrong ID")
+
+    start_test("Test XDP prog replace without force...")
+    ret, _ = sim.set_xdp(obj, "drv", fail=False)
+    fail(ret == 0, "Replaced XDP program without -force")
+    sim.wait_for_flush(total=1)
+
+    start_test("Test XDP prog replace with force...")
+    ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False)
+    fail(ret != 0, "Could not replace XDP program with -force")
+    bpftool_prog_list_wait(expected=1)
+    ipl = sim.ip_link_show(xdp=True)
+    progs = bpftool_prog_list(expected=1)
+    fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+         "Loaded program has wrong ID")
+    fail("dev" in progs[0].keys(),
+         "Device parameters reported for non-offloaded program")
+
+    start_test("Test XDP prog replace with bad flags...")
+    ret, _, err = sim.set_xdp(obj, "generic", force=True,
+                              fail=False, include_stderr=True)
+    fail(ret == 0, "Replaced XDP program with a program in different mode")
+    fail(err.count("File exists") != 1, "Replaced driver XDP with generic")
+    ret, _, err = sim.set_xdp(obj, "", force=True,
+                              fail=False, include_stderr=True)
+    fail(ret == 0, "Replaced XDP program with a program in different mode")
+    check_extack(err, "program loaded with different flags.", args)
+
+    start_test("Test XDP prog remove with bad flags...")
+    ret, _, err = sim.unset_xdp("", force=True,
+                                fail=False, include_stderr=True)
+    fail(ret == 0, "Removed program with a bad mode")
+    check_extack(err, "program loaded with different flags.", args)
+
+    start_test("Test MTU restrictions...")
+    ret, _ = sim.set_mtu(9000, fail=False)
+    fail(ret == 0,
+         "Driver should refuse to increase MTU to 9000 with XDP loaded...")
+    sim.unset_xdp("drv")
+    bpftool_prog_list_wait(expected=0)
+    sim.set_mtu(9000)
+    ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True)
+    fail(ret == 0, "Driver should refuse to load program with MTU of 9000...")
+    check_extack_nsim(err, "MTU too large w/ XDP enabled.", args)
+    sim.set_mtu(1500)
+
+    sim.wait_for_flush()
+    start_test("Test non-offload XDP attaching to HW...")
+    bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/nooffload")
+    nooffload = bpf_pinned("/sys/fs/bpf/nooffload")
+    ret, _, err = sim.set_xdp(nooffload, "offload",
+                              fail=False, include_stderr=True)
+    fail(ret == 0, "attached non-offloaded XDP program to HW")
+    check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+    rm("/sys/fs/bpf/nooffload")
+
+    start_test("Test offload XDP attaching to drv...")
+    bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+                      dev=sim['ifname'])
+    offload = bpf_pinned("/sys/fs/bpf/offload")
+    ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
+    fail(ret == 0, "attached offloaded XDP program to drv")
+    check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args)
+    rm("/sys/fs/bpf/offload")
+    sim.wait_for_flush()
+
+    start_test("Test XDP offload...")
+    _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
+    ipl = sim.ip_link_show(xdp=True)
+    link_xdp = ipl["xdp"]["prog"]
+    progs = bpftool_prog_list(expected=1)
+    prog = progs[0]
+    fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
+    check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+
+    start_test("Test XDP offload is device bound...")
+    dfs = sim.dfs_get_bound_progs(expected=1)
+    dprog = dfs[0]
+
+    fail(prog["id"] != link_xdp["id"], "Program IDs don't match")
+    fail(prog["tag"] != link_xdp["tag"], "Program tags don't match")
+    fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match")
+    fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+    fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+    start_test("Test removing XDP program many times...")
+    sim.unset_xdp("offload")
+    sim.unset_xdp("offload")
+    sim.unset_xdp("drv")
+    sim.unset_xdp("drv")
+    sim.unset_xdp("")
+    sim.unset_xdp("")
+    bpftool_prog_list_wait(expected=0)
+
+    start_test("Test attempt to use a program for a wrong device...")
+    sim2 = NetdevSim()
+    sim2.set_xdp(obj, "offload")
+    pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+
+    ret, _, err = sim.set_xdp(pinned, "offload",
+                              fail=False, include_stderr=True)
+    fail(ret == 0, "Pinned program loaded for a different device accepted")
+    check_extack_nsim(err, "program bound to different dev.", args)
+    sim2.remove()
+    ret, _, err = sim.set_xdp(pinned, "offload",
+                              fail=False, include_stderr=True)
+    fail(ret == 0, "Pinned program loaded for a removed device accepted")
+    check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+    rm(pin_file)
+    bpftool_prog_list_wait(expected=0)
+
+    start_test("Test multi-attachment XDP - attach...")
+    sim.set_xdp(obj, "offload")
+    xdp = sim.ip_link_show(xdp=True)["xdp"]
+    offloaded = sim.dfs_read("bpf_offloaded_id")
+    fail("prog" not in xdp, "Base program not reported in single program mode")
+    fail(len(ipl["xdp"]["attached"]) != 1,
+         "Wrong attached program count with one program")
+
+    sim.set_xdp(obj, "")
+    two_xdps = sim.ip_link_show(xdp=True)["xdp"]
+    offloaded2 = sim.dfs_read("bpf_offloaded_id")
+
+    fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs")
+    fail("prog" in two_xdps, "Base program reported in multi program mode")
+    fail(xdp["attached"][0] not in two_xdps["attached"],
+         "Offload program not reported after driver activated")
+    fail(len(two_xdps["attached"]) != 2,
+         "Wrong attached program count with two programs")
+    fail(two_xdps["attached"][0]["prog"]["id"] ==
+         two_xdps["attached"][1]["prog"]["id"],
+         "offloaded and drv programs have the same id")
+    fail(offloaded != offloaded2,
+         "offload ID changed after loading driver program")
+
+    start_test("Test multi-attachment XDP - replace...")
+    ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
+    fail(err.count("busy") != 1, "Replaced one of programs without -force")
+
+    start_test("Test multi-attachment XDP - detach...")
+    ret, _, err = sim.unset_xdp("drv", force=True,
+                                fail=False, include_stderr=True)
+    fail(ret == 0, "Removed program with a bad mode")
+    check_extack(err, "program loaded with different flags.", args)
+
+    sim.unset_xdp("offload")
+    xdp = sim.ip_link_show(xdp=True)["xdp"]
+    offloaded = sim.dfs_read("bpf_offloaded_id")
+
+    fail(xdp["mode"] != 1, "Bad mode reported after multiple programs")
+    fail("prog" not in xdp,
+         "Base program not reported after multi program mode")
+    fail(xdp["attached"][0] not in two_xdps["attached"],
+         "Offload program not reported after driver activated")
+    fail(len(ipl["xdp"]["attached"]) != 1,
+         "Wrong attached program count with remaining programs")
+    fail(offloaded != "0", "offload ID reported with only driver program left")
+
+    start_test("Test multi-attachment XDP - device remove...")
+    sim.set_xdp(obj, "offload")
+    sim.remove()
+
+    sim = NetdevSim()
+    sim.set_ethtool_tc_offloads(True)
+
+    start_test("Test mixing of TC and XDP...")
+    sim.tc_add_ingress()
+    sim.set_xdp(obj, "offload")
+    ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+                                         fail=False, include_stderr=True)
+    fail(ret == 0, "Loading TC when XDP active should fail")
+    check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
+    sim.unset_xdp("offload")
+    sim.wait_for_flush()
+
+    sim.cls_bpf_add_filter(obj, skip_sw=True)
+    ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
+    fail(ret == 0, "Loading XDP when TC active should fail")
+    check_extack_nsim(err, "TC program is already loaded.", args)
+
+    start_test("Test binding TC from pinned...")
+    pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+    sim.tc_flush_filters(bound=1, total=1)
+    sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True)
+    sim.tc_flush_filters(bound=1, total=1)
+
+    start_test("Test binding XDP from pinned...")
+    sim.set_xdp(obj, "offload")
+    pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1)
+
+    sim.set_xdp(pinned, "offload", force=True)
+    sim.unset_xdp("offload")
+    sim.set_xdp(pinned, "offload", force=True)
+    sim.unset_xdp("offload")
+
+    start_test("Test offload of wrong type fails...")
+    ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False)
+    fail(ret == 0, "Managed to attach XDP program to TC")
+
+    start_test("Test asking for TC offload of two filters...")
+    sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
+    ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True,
+                                         fail=False, include_stderr=True)
+    fail(ret == 0, "Managed to offload two TC filters at the same time")
+    check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
+
+    sim.tc_flush_filters(bound=2, total=2)
+
+    start_test("Test if netdev removal waits for translation...")
+    delay_msec = 500
+    sim.dfs["bpf_bind_verifier_delay"] = delay_msec
+    start = time.time()
+    cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \
+               (sim['ifname'], obj)
+    tc_proc = cmd(cmd_line, background=True, fail=False)
+    # Wait for the verifier to start
+    while sim.dfs_num_bound_progs() <= 2:
+        pass
+    sim.remove()
+    end = time.time()
+    ret, _ = cmd_result(tc_proc, fail=False)
+    time_diff = end - start
+    log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff))
+
+    fail(ret == 0, "Managed to load TC filter on a unregistering device")
+    delay_sec = delay_msec * 0.001
+    fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
+         (time_diff, delay_sec))
+
+    # Remove all pinned files and reinstantiate the netdev
+    clean_up()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+    map_obj = bpf_obj("sample_map_ret0.o")
+    start_test("Test loading program with maps...")
+    sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+
+    start_test("Test bpftool bound info reporting (own ns)...")
+    check_dev_info(False, "")
+
+    start_test("Test bpftool bound info reporting (other ns)...")
+    ns = mknetns()
+    sim.set_ns(ns)
+    check_dev_info(True, "")
+
+    start_test("Test bpftool bound info reporting (remote ns)...")
+    check_dev_info(False, ns)
+
+    start_test("Test bpftool bound info reporting (back to own ns)...")
+    sim.set_ns("")
+    check_dev_info(False, "")
+
+    prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog")
+    map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2)
+    sim.remove()
+
+    start_test("Test bpftool bound info reporting (removed dev)...")
+    check_dev_info_removed(prog_file=prog_file, map_file=map_file)
+
+    # Remove all pinned files and reinstantiate the netdev
+    clean_up()
+    bpftool_prog_list_wait(expected=0)
+
+    sim = NetdevSim()
+
+    start_test("Test map update (no flags)...")
+    sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+    maps = bpftool_map_list(expected=2)
+    array = maps[0] if maps[0]["type"] == "array" else maps[1]
+    htab = maps[0] if maps[0]["type"] == "hash" else maps[1]
+    for m in maps:
+        for i in range(2):
+            bpftool("map update id %d key %s value %s" %
+                    (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+    for m in maps:
+        ret, _ = bpftool("map update id %d key %s value %s" %
+                         (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+                         fail=False)
+        fail(ret == 0, "added too many entries")
+
+    start_test("Test map update (exists)...")
+    for m in maps:
+        for i in range(2):
+            bpftool("map update id %d key %s value %s exist" %
+                    (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+    for m in maps:
+        ret, err = bpftool("map update id %d key %s value %s exist" %
+                           (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+                           fail=False)
+        fail(ret == 0, "updated non-existing key")
+        fail(err["error"].find("No such file or directory") == -1,
+             "expected ENOENT, error is '%s'" % (err["error"]))
+
+    start_test("Test map update (noexist)...")
+    for m in maps:
+        for i in range(2):
+            ret, err = bpftool("map update id %d key %s value %s noexist" %
+                               (m["id"], int2str("I", i), int2str("Q", i * 3)),
+                               fail=False)
+        fail(ret == 0, "updated existing key")
+        fail(err["error"].find("File exists") == -1,
+             "expected EEXIST, error is '%s'" % (err["error"]))
+
+    start_test("Test map dump...")
+    for m in maps:
+        _, entries = bpftool("map dump id %d" % (m["id"]))
+        for i in range(2):
+            key = str2int(entries[i]["key"])
+            fail(key != i, "expected key %d, got %d" % (key, i))
+            val = str2int(entries[i]["value"])
+            fail(val != i * 3, "expected value %d, got %d" % (val, i * 3))
+
+    start_test("Test map getnext...")
+    for m in maps:
+        _, entry = bpftool("map getnext id %d" % (m["id"]))
+        key = str2int(entry["next_key"])
+        fail(key != 0, "next key %d, expected %d" % (key, 0))
+        _, entry = bpftool("map getnext id %d key %s" %
+                           (m["id"], int2str("I", 0)))
+        key = str2int(entry["next_key"])
+        fail(key != 1, "next key %d, expected %d" % (key, 1))
+        ret, err = bpftool("map getnext id %d key %s" %
+                           (m["id"], int2str("I", 1)), fail=False)
+        fail(ret == 0, "got next key past the end of map")
+        fail(err["error"].find("No such file or directory") == -1,
+             "expected ENOENT, error is '%s'" % (err["error"]))
+
+    start_test("Test map delete (htab)...")
+    for i in range(2):
+        bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i)))
+
+    start_test("Test map delete (array)...")
+    for i in range(2):
+        ret, err = bpftool("map delete id %d key %s" %
+                           (htab["id"], int2str("I", i)), fail=False)
+        fail(ret == 0, "removed entry from an array")
+        fail(err["error"].find("No such file or directory") == -1,
+             "expected ENOENT, error is '%s'" % (err["error"]))
+
+    start_test("Test map remove...")
+    sim.unset_xdp("offload")
+    bpftool_map_list_wait(expected=0)
+    sim.remove()
+
+    sim = NetdevSim()
+    sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+    sim.remove()
+    bpftool_map_list_wait(expected=0)
+
+    start_test("Test map creation fail path...")
+    sim = NetdevSim()
+    sim.dfs["bpf_map_accept"] = "N"
+    ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
+    fail(ret == 0,
+         "netdevsim didn't refuse to create a map with offload disabled")
+
+    sim.remove()
+
+    start_test("Test multi-dev ASIC program reuse...")
+    simA = NetdevSim()
+    simB1 = NetdevSim()
+    simB2 = NetdevSim(link=simB1)
+    simB3 = NetdevSim(link=simB1)
+    sims = (simA, simB1, simB2, simB3)
+    simB = (simB1, simB2, simB3)
+
+    bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA",
+                      dev=simA['ifname'])
+    progA = bpf_pinned("/sys/fs/bpf/nsimA")
+    bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB",
+                      dev=simB1['ifname'])
+    progB = bpf_pinned("/sys/fs/bpf/nsimB")
+
+    simA.set_xdp(progA, "offload", JSON=False)
+    for d in simB:
+        d.set_xdp(progB, "offload", JSON=False)
+
+    start_test("Test multi-dev ASIC cross-dev replace...")
+    ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False)
+    fail(ret == 0, "cross-ASIC program allowed")
+    for d in simB:
+        ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False)
+        fail(ret == 0, "cross-ASIC program allowed")
+
+    start_test("Test multi-dev ASIC cross-dev install...")
+    for d in sims:
+        d.unset_xdp("offload")
+
+    ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False,
+                               fail=False, include_stderr=True)
+    fail(ret == 0, "cross-ASIC program allowed")
+    check_extack_nsim(err, "program bound to different dev.", args)
+    for d in simB:
+        ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False,
+                                fail=False, include_stderr=True)
+        fail(ret == 0, "cross-ASIC program allowed")
+        check_extack_nsim(err, "program bound to different dev.", args)
+
+    start_test("Test multi-dev ASIC cross-dev map reuse...")
+
+    mapA = bpftool("prog show %s" % (progA))[1]["map_ids"][0]
+    mapB = bpftool("prog show %s" % (progB))[1]["map_ids"][0]
+
+    ret, _ = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+                               dev=simB3['ifname'],
+                               maps=["idx 0 id %d" % (mapB)],
+                               fail=False)
+    fail(ret != 0, "couldn't reuse a map on the same ASIC")
+    rm("/sys/fs/bpf/nsimB_")
+
+    ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA_",
+                                    dev=simA['ifname'],
+                                    maps=["idx 0 id %d" % (mapB)],
+                                    fail=False, include_stderr=True)
+    fail(ret == 0, "could reuse a map on a different ASIC")
+    fail(err.count("offload device mismatch between prog and map") == 0,
+         "error message missing for cross-ASIC map")
+
+    ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+                                    dev=simB1['ifname'],
+                                    maps=["idx 0 id %d" % (mapA)],
+                                    fail=False, include_stderr=True)
+    fail(ret == 0, "could reuse a map on a different ASIC")
+    fail(err.count("offload device mismatch between prog and map") == 0,
+         "error message missing for cross-ASIC map")
+
+    start_test("Test multi-dev ASIC cross-dev destruction...")
+    bpftool_prog_list_wait(expected=2)
+
+    simA.remove()
+    bpftool_prog_list_wait(expected=1)
+
+    ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+    fail(ifnameB != simB1['ifname'], "program not bound to originial device")
+    simB1.remove()
+    bpftool_prog_list_wait(expected=1)
+
+    start_test("Test multi-dev ASIC cross-dev destruction - move...")
+    ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+    fail(ifnameB not in (simB2['ifname'], simB3['ifname']),
+         "program not bound to remaining devices")
+
+    simB2.remove()
+    ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+    fail(ifnameB != simB3['ifname'], "program not bound to remaining device")
+
+    simB3.remove()
+    bpftool_prog_list_wait(expected=0)
+
+    start_test("Test multi-dev ASIC cross-dev destruction - orphaned...")
+    ret, out = bpftool("prog show %s" % (progB), fail=False)
+    fail(ret == 0, "got information about orphaned program")
+    fail("error" not in out, "no error reported for get info on orphaned")
+    fail(out["error"] != "can't get prog info: No such device",
+         "wrong error for get info on orphaned")
+
+    print("%s: OK" % (os.path.basename(__file__)))
+
+finally:
+    log("Clean up...", "", level=1)
+    log_level_inc()
+    clean_up()
diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c
new file mode 100644
index 000000000..6e11ba117
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_pkt_access.c
@@ -0,0 +1,65 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+int _version SEC("version") = 1;
+
+SEC("test1")
+int process(struct __sk_buff *skb)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct ethhdr *eth = (struct ethhdr *)(data);
+	struct tcphdr *tcp = NULL;
+	__u8 proto = 255;
+	__u64 ihl_len;
+
+	if (eth + 1 > data_end)
+		return TC_ACT_SHOT;
+
+	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+		struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+		if (iph + 1 > data_end)
+			return TC_ACT_SHOT;
+		ihl_len = iph->ihl * 4;
+		proto = iph->protocol;
+		tcp = (struct tcphdr *)((void *)(iph) + ihl_len);
+	} else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+		struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1);
+
+		if (ip6h + 1 > data_end)
+			return TC_ACT_SHOT;
+		ihl_len = sizeof(*ip6h);
+		proto = ip6h->nexthdr;
+		tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len);
+	}
+
+	if (tcp) {
+		if (((void *)(tcp) + 20) > data_end || proto != 6)
+			return TC_ACT_SHOT;
+		barrier(); /* to force ordering of checks */
+		if (((void *)(tcp) + 18) > data_end)
+			return TC_ACT_SHOT;
+		if (tcp->urg_ptr == 123)
+			return TC_ACT_OK;
+	}
+
+	return TC_ACT_UNSPEC;
+}
diff --git a/tools/testing/selftests/bpf/test_pkt_md_access.c b/tools/testing/selftests/bpf/test_pkt_md_access.c
new file mode 100644
index 000000000..7956302ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_pkt_md_access.c
@@ -0,0 +1,46 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+#if  __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define TEST_FIELD(TYPE, FIELD, MASK)					\
+	{								\
+		TYPE tmp = *(volatile TYPE *)&skb->FIELD;		\
+		if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK))	\
+			return TC_ACT_SHOT;				\
+	}
+#else
+#define TEST_FIELD_OFFSET(a, b)	((sizeof(a) - sizeof(b)) / sizeof(b))
+#define TEST_FIELD(TYPE, FIELD, MASK)					\
+	{								\
+		TYPE tmp = *((volatile TYPE *)&skb->FIELD +		\
+			      TEST_FIELD_OFFSET(skb->FIELD, TYPE));	\
+		if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK))	\
+			return TC_ACT_SHOT;				\
+	}
+#endif
+
+SEC("test1")
+int process(struct __sk_buff *skb)
+{
+	TEST_FIELD(__u8,  len, 0xFF);
+	TEST_FIELD(__u16, len, 0xFFFF);
+	TEST_FIELD(__u32, len, 0xFFFFFFFF);
+	TEST_FIELD(__u16, protocol, 0xFFFF);
+	TEST_FIELD(__u32, protocol, 0xFFFFFFFF);
+	TEST_FIELD(__u8,  hash, 0xFF);
+	TEST_FIELD(__u16, hash, 0xFFFF);
+	TEST_FIELD(__u32, hash, 0xFFFFFFFF);
+
+	return TC_ACT_OK;
+}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
new file mode 100644
index 000000000..0fcd38ffc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -0,0 +1,1756 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <linux/types.h>
+typedef __u16 __sum16;
+#include <arpa/inet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/filter.h>
+#include <linux/perf_event.h>
+#include <linux/unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_iptunnel_common.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+#include "bpf_rlimit.h"
+#include "trace_helpers.h"
+
+static int error_cnt, pass_cnt;
+static bool jit_enabled;
+
+#define MAGIC_BYTES 123
+
+/* ipv4 test vector */
+static struct {
+	struct ethhdr eth;
+	struct iphdr iph;
+	struct tcphdr tcp;
+} __packed pkt_v4 = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+	.iph.ihl = 5,
+	.iph.protocol = 6,
+	.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+	.tcp.urg_ptr = 123,
+};
+
+/* ipv6 test vector */
+static struct {
+	struct ethhdr eth;
+	struct ipv6hdr iph;
+	struct tcphdr tcp;
+} __packed pkt_v6 = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+	.iph.nexthdr = 6,
+	.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+	.tcp.urg_ptr = 123,
+};
+
+#define CHECK(condition, tag, format...) ({				\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		error_cnt++;						\
+		printf("%s:FAIL:%s ", __func__, tag);			\
+		printf(format);						\
+	} else {							\
+		pass_cnt++;						\
+		printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\
+	}								\
+	__ret;								\
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+			const char *name)
+{
+	struct bpf_map *map;
+
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map) {
+		printf("%s:FAIL:map '%s' not found\n", test, name);
+		error_cnt++;
+		return -1;
+	}
+	return bpf_map__fd(map);
+}
+
+static void test_pkt_access(void)
+{
+	const char *file = "./test_pkt_access.o";
+	struct bpf_object *obj;
+	__u32 duration, retval;
+	int err, prog_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || errno || retval, "ipv4",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+
+	err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || errno || retval, "ipv6",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+	bpf_object__close(obj);
+}
+
+static void test_xdp(void)
+{
+	struct vip key4 = {.protocol = 6, .family = AF_INET};
+	struct vip key6 = {.protocol = 6, .family = AF_INET6};
+	struct iptnl_info value4 = {.family = AF_INET};
+	struct iptnl_info value6 = {.family = AF_INET6};
+	const char *file = "./test_xdp.o";
+	struct bpf_object *obj;
+	char buf[128];
+	struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr);
+	struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+	__u32 duration, retval, size;
+	int err, prog_fd, map_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	map_fd = bpf_find_map(__func__, obj, "vip2tnl");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &key4, &value4, 0);
+	bpf_map_update_elem(map_fd, &key6, &value6, 0);
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+
+	CHECK(err || errno || retval != XDP_TX || size != 74 ||
+	      iph->protocol != IPPROTO_IPIP, "ipv4",
+	      "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != XDP_TX || size != 114 ||
+	      iph6->nexthdr != IPPROTO_IPV6, "ipv6",
+	      "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+out:
+	bpf_object__close(obj);
+}
+
+static void test_xdp_adjust_tail(void)
+{
+	const char *file = "./test_adjust_tail.o";
+	struct bpf_object *obj;
+	char buf[128];
+	__u32 duration, retval, size;
+	int err, prog_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+
+	CHECK(err || errno || retval != XDP_DROP,
+	      "ipv4", "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != XDP_TX || size != 54,
+	      "ipv6", "err %d errno %d retval %d size %d\n",
+	      err, errno, retval, size);
+	bpf_object__close(obj);
+}
+
+
+
+#define MAGIC_VAL 0x1234
+#define NUM_ITER 100000
+#define VIP_NUM 5
+
+static void test_l4lb(const char *file)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	struct vip key = {.protocol = 6};
+	struct vip_meta {
+		__u32 flags;
+		__u32 vip_num;
+	} value = {.vip_num = VIP_NUM};
+	__u32 stats_key = VIP_NUM;
+	struct vip_stats {
+		__u64 bytes;
+		__u64 pkts;
+	} stats[nr_cpus];
+	struct real_definition {
+		union {
+			__be32 dst;
+			__be32 dstv6[4];
+		};
+		__u8 flags;
+	} real_def = {.dst = MAGIC_VAL};
+	__u32 ch_key = 11, real_num = 3;
+	__u32 duration, retval, size;
+	int err, i, prog_fd, map_fd;
+	__u64 bytes = 0, pkts = 0;
+	struct bpf_object *obj;
+	char buf[128];
+	u32 *magic = (u32 *)buf;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	map_fd = bpf_find_map(__func__, obj, "vip_map");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &key, &value, 0);
+
+	map_fd = bpf_find_map(__func__, obj, "ch_rings");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+	map_fd = bpf_find_map(__func__, obj, "reals");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
+	      *magic != MAGIC_VAL, "ipv4",
+	      "err %d errno %d retval %d size %d magic %x\n",
+	      err, errno, retval, size, *magic);
+
+	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
+	      *magic != MAGIC_VAL, "ipv6",
+	      "err %d errno %d retval %d size %d magic %x\n",
+	      err, errno, retval, size, *magic);
+
+	map_fd = bpf_find_map(__func__, obj, "stats");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_lookup_elem(map_fd, &stats_key, stats);
+	for (i = 0; i < nr_cpus; i++) {
+		bytes += stats[i].bytes;
+		pkts += stats[i].pkts;
+	}
+	if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+		error_cnt++;
+		printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
+	}
+out:
+	bpf_object__close(obj);
+}
+
+static void test_l4lb_all(void)
+{
+	const char *file1 = "./test_l4lb.o";
+	const char *file2 = "./test_l4lb_noinline.o";
+
+	test_l4lb(file1);
+	test_l4lb(file2);
+}
+
+static void test_xdp_noinline(void)
+{
+	const char *file = "./test_xdp_noinline.o";
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	struct vip key = {.protocol = 6};
+	struct vip_meta {
+		__u32 flags;
+		__u32 vip_num;
+	} value = {.vip_num = VIP_NUM};
+	__u32 stats_key = VIP_NUM;
+	struct vip_stats {
+		__u64 bytes;
+		__u64 pkts;
+	} stats[nr_cpus];
+	struct real_definition {
+		union {
+			__be32 dst;
+			__be32 dstv6[4];
+		};
+		__u8 flags;
+	} real_def = {.dst = MAGIC_VAL};
+	__u32 ch_key = 11, real_num = 3;
+	__u32 duration, retval, size;
+	int err, i, prog_fd, map_fd;
+	__u64 bytes = 0, pkts = 0;
+	struct bpf_object *obj;
+	char buf[128];
+	u32 *magic = (u32 *)buf;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	map_fd = bpf_find_map(__func__, obj, "vip_map");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &key, &value, 0);
+
+	map_fd = bpf_find_map(__func__, obj, "ch_rings");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+	map_fd = bpf_find_map(__func__, obj, "reals");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != 1 || size != 54 ||
+	      *magic != MAGIC_VAL, "ipv4",
+	      "err %d errno %d retval %d size %d magic %x\n",
+	      err, errno, retval, size, *magic);
+
+	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+				buf, &size, &retval, &duration);
+	CHECK(err || errno || retval != 1 || size != 74 ||
+	      *magic != MAGIC_VAL, "ipv6",
+	      "err %d errno %d retval %d size %d magic %x\n",
+	      err, errno, retval, size, *magic);
+
+	map_fd = bpf_find_map(__func__, obj, "stats");
+	if (map_fd < 0)
+		goto out;
+	bpf_map_lookup_elem(map_fd, &stats_key, stats);
+	for (i = 0; i < nr_cpus; i++) {
+		bytes += stats[i].bytes;
+		pkts += stats[i].pkts;
+	}
+	if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+		error_cnt++;
+		printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
+	}
+out:
+	bpf_object__close(obj);
+}
+
+static void test_tcp_estats(void)
+{
+	const char *file = "./test_tcp_estats.o";
+	int err, prog_fd;
+	struct bpf_object *obj;
+	__u32 duration = 0;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	CHECK(err, "", "err %d errno %d\n", err, errno);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	bpf_object__close(obj);
+}
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
+static bool is_jit_enabled(void)
+{
+	const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+	bool enabled = false;
+	int sysctl_fd;
+
+	sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
+	if (sysctl_fd != -1) {
+		char tmpc;
+
+		if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
+			enabled = (tmpc != '0');
+		close(sysctl_fd);
+	}
+
+	return enabled;
+}
+
+static void test_bpf_obj_id(void)
+{
+	const __u64 array_magic_value = 0xfaceb00c;
+	const __u32 array_key = 0;
+	const int nr_iters = 2;
+	const char *file = "./test_obj_id.o";
+	const char *expected_prog_name = "test_obj_id";
+	const char *expected_map_name = "test_map_id";
+	const __u64 nsec_per_sec = 1000000000;
+
+	struct bpf_object *objs[nr_iters];
+	int prog_fds[nr_iters], map_fds[nr_iters];
+	/* +1 to test for the info_len returned by kernel */
+	struct bpf_prog_info prog_infos[nr_iters + 1];
+	struct bpf_map_info map_infos[nr_iters + 1];
+	/* Each prog only uses one map. +1 to test nr_map_ids
+	 * returned by kernel.
+	 */
+	__u32 map_ids[nr_iters + 1];
+	char jited_insns[128], xlated_insns[128], zeros[128];
+	__u32 i, next_id, info_len, nr_id_found, duration = 0;
+	struct timespec real_time_ts, boot_time_ts;
+	int err = 0;
+	__u64 array_value;
+	uid_t my_uid = getuid();
+	time_t now, load_time;
+
+	err = bpf_prog_get_fd_by_id(0);
+	CHECK(err >= 0 || errno != ENOENT,
+	      "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
+
+	err = bpf_map_get_fd_by_id(0);
+	CHECK(err >= 0 || errno != ENOENT,
+	      "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
+
+	for (i = 0; i < nr_iters; i++)
+		objs[i] = NULL;
+
+	/* Check bpf_obj_get_info_by_fd() */
+	bzero(zeros, sizeof(zeros));
+	for (i = 0; i < nr_iters; i++) {
+		now = time(NULL);
+		err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
+				    &objs[i], &prog_fds[i]);
+		/* test_obj_id.o is a dumb prog. It should never fail
+		 * to load.
+		 */
+		if (err)
+			error_cnt++;
+		assert(!err);
+
+		/* Insert a magic value to the map */
+		map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
+		assert(map_fds[i] >= 0);
+		err = bpf_map_update_elem(map_fds[i], &array_key,
+					  &array_magic_value, 0);
+		assert(!err);
+
+		/* Check getting map info */
+		info_len = sizeof(struct bpf_map_info) * 2;
+		bzero(&map_infos[i], info_len);
+		err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i],
+					     &info_len);
+		if (CHECK(err ||
+			  map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
+			  map_infos[i].key_size != sizeof(__u32) ||
+			  map_infos[i].value_size != sizeof(__u64) ||
+			  map_infos[i].max_entries != 1 ||
+			  map_infos[i].map_flags != 0 ||
+			  info_len != sizeof(struct bpf_map_info) ||
+			  strcmp((char *)map_infos[i].name, expected_map_name),
+			  "get-map-info(fd)",
+			  "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+			  err, errno,
+			  map_infos[i].type, BPF_MAP_TYPE_ARRAY,
+			  info_len, sizeof(struct bpf_map_info),
+			  map_infos[i].key_size,
+			  map_infos[i].value_size,
+			  map_infos[i].max_entries,
+			  map_infos[i].map_flags,
+			  map_infos[i].name, expected_map_name))
+			goto done;
+
+		/* Check getting prog info */
+		info_len = sizeof(struct bpf_prog_info) * 2;
+		bzero(&prog_infos[i], info_len);
+		bzero(jited_insns, sizeof(jited_insns));
+		bzero(xlated_insns, sizeof(xlated_insns));
+		prog_infos[i].jited_prog_insns = ptr_to_u64(jited_insns);
+		prog_infos[i].jited_prog_len = sizeof(jited_insns);
+		prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns);
+		prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
+		prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
+		prog_infos[i].nr_map_ids = 2;
+		err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
+		assert(!err);
+		err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
+		assert(!err);
+		err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
+					     &info_len);
+		load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
+			+ (prog_infos[i].load_time / nsec_per_sec);
+		if (CHECK(err ||
+			  prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
+			  info_len != sizeof(struct bpf_prog_info) ||
+			  (jit_enabled && !prog_infos[i].jited_prog_len) ||
+			  (jit_enabled &&
+			   !memcmp(jited_insns, zeros, sizeof(zeros))) ||
+			  !prog_infos[i].xlated_prog_len ||
+			  !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
+			  load_time < now - 60 || load_time > now + 60 ||
+			  prog_infos[i].created_by_uid != my_uid ||
+			  prog_infos[i].nr_map_ids != 1 ||
+			  *(int *)prog_infos[i].map_ids != map_infos[i].id ||
+			  strcmp((char *)prog_infos[i].name, expected_prog_name),
+			  "get-prog-info(fd)",
+			  "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+			  err, errno, i,
+			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
+			  info_len, sizeof(struct bpf_prog_info),
+			  jit_enabled,
+			  prog_infos[i].jited_prog_len,
+			  prog_infos[i].xlated_prog_len,
+			  !!memcmp(jited_insns, zeros, sizeof(zeros)),
+			  !!memcmp(xlated_insns, zeros, sizeof(zeros)),
+			  load_time, now,
+			  prog_infos[i].created_by_uid, my_uid,
+			  prog_infos[i].nr_map_ids, 1,
+			  *(int *)prog_infos[i].map_ids, map_infos[i].id,
+			  prog_infos[i].name, expected_prog_name))
+			goto done;
+	}
+
+	/* Check bpf_prog_get_next_id() */
+	nr_id_found = 0;
+	next_id = 0;
+	while (!bpf_prog_get_next_id(next_id, &next_id)) {
+		struct bpf_prog_info prog_info = {};
+		__u32 saved_map_id;
+		int prog_fd;
+
+		info_len = sizeof(prog_info);
+
+		prog_fd = bpf_prog_get_fd_by_id(next_id);
+		if (prog_fd < 0 && errno == ENOENT)
+			/* The bpf_prog is in the dead row */
+			continue;
+		if (CHECK(prog_fd < 0, "get-prog-fd(next_id)",
+			  "prog_fd %d next_id %d errno %d\n",
+			  prog_fd, next_id, errno))
+			break;
+
+		for (i = 0; i < nr_iters; i++)
+			if (prog_infos[i].id == next_id)
+				break;
+
+		if (i == nr_iters)
+			continue;
+
+		nr_id_found++;
+
+		/* Negative test:
+		 * prog_info.nr_map_ids = 1
+		 * prog_info.map_ids = NULL
+		 */
+		prog_info.nr_map_ids = 1;
+		err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+		if (CHECK(!err || errno != EFAULT,
+			  "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
+			  err, errno, EFAULT))
+			break;
+		bzero(&prog_info, sizeof(prog_info));
+		info_len = sizeof(prog_info);
+
+		saved_map_id = *(int *)(prog_infos[i].map_ids);
+		prog_info.map_ids = prog_infos[i].map_ids;
+		prog_info.nr_map_ids = 2;
+		err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+		prog_infos[i].jited_prog_insns = 0;
+		prog_infos[i].xlated_prog_insns = 0;
+		CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
+		      memcmp(&prog_info, &prog_infos[i], info_len) ||
+		      *(int *)prog_info.map_ids != saved_map_id,
+		      "get-prog-info(next_id->fd)",
+		      "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
+		      err, errno, info_len, sizeof(struct bpf_prog_info),
+		      memcmp(&prog_info, &prog_infos[i], info_len),
+		      *(int *)prog_info.map_ids, saved_map_id);
+		close(prog_fd);
+	}
+	CHECK(nr_id_found != nr_iters,
+	      "check total prog id found by get_next_id",
+	      "nr_id_found %u(%u)\n",
+	      nr_id_found, nr_iters);
+
+	/* Check bpf_map_get_next_id() */
+	nr_id_found = 0;
+	next_id = 0;
+	while (!bpf_map_get_next_id(next_id, &next_id)) {
+		struct bpf_map_info map_info = {};
+		int map_fd;
+
+		info_len = sizeof(map_info);
+
+		map_fd = bpf_map_get_fd_by_id(next_id);
+		if (map_fd < 0 && errno == ENOENT)
+			/* The bpf_map is in the dead row */
+			continue;
+		if (CHECK(map_fd < 0, "get-map-fd(next_id)",
+			  "map_fd %d next_id %u errno %d\n",
+			  map_fd, next_id, errno))
+			break;
+
+		for (i = 0; i < nr_iters; i++)
+			if (map_infos[i].id == next_id)
+				break;
+
+		if (i == nr_iters)
+			continue;
+
+		nr_id_found++;
+
+		err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
+		assert(!err);
+
+		err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+		CHECK(err || info_len != sizeof(struct bpf_map_info) ||
+		      memcmp(&map_info, &map_infos[i], info_len) ||
+		      array_value != array_magic_value,
+		      "check get-map-info(next_id->fd)",
+		      "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
+		      err, errno, info_len, sizeof(struct bpf_map_info),
+		      memcmp(&map_info, &map_infos[i], info_len),
+		      array_value, array_magic_value);
+
+		close(map_fd);
+	}
+	CHECK(nr_id_found != nr_iters,
+	      "check total map id found by get_next_id",
+	      "nr_id_found %u(%u)\n",
+	      nr_id_found, nr_iters);
+
+done:
+	for (i = 0; i < nr_iters; i++)
+		bpf_object__close(objs[i]);
+}
+
+static void test_pkt_md_access(void)
+{
+	const char *file = "./test_pkt_md_access.o";
+	struct bpf_object *obj;
+	__u32 duration, retval;
+	int err, prog_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+	if (err) {
+		error_cnt++;
+		return;
+	}
+
+	err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || retval, "",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+
+	bpf_object__close(obj);
+}
+
+static void test_obj_name(void)
+{
+	struct {
+		const char *name;
+		int success;
+		int expected_errno;
+	} tests[] = {
+		{ "", 1, 0 },
+		{ "_123456789ABCDE", 1, 0 },
+		{ "_123456789ABCDEF", 0, EINVAL },
+		{ "_123456789ABCD\n", 0, EINVAL },
+	};
+	struct bpf_insn prog[] = {
+		BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	__u32 duration = 0;
+	int i;
+
+	for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+		size_t name_len = strlen(tests[i].name) + 1;
+		union bpf_attr attr;
+		size_t ncopy;
+		int fd;
+
+		/* test different attr.prog_name during BPF_PROG_LOAD */
+		ncopy = name_len < sizeof(attr.prog_name) ?
+			name_len : sizeof(attr.prog_name);
+		bzero(&attr, sizeof(attr));
+		attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+		attr.insn_cnt = 2;
+		attr.insns = ptr_to_u64(prog);
+		attr.license = ptr_to_u64("");
+		memcpy(attr.prog_name, tests[i].name, ncopy);
+
+		fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+		CHECK((tests[i].success && fd < 0) ||
+		      (!tests[i].success && fd != -1) ||
+		      (!tests[i].success && errno != tests[i].expected_errno),
+		      "check-bpf-prog-name",
+		      "fd %d(%d) errno %d(%d)\n",
+		       fd, tests[i].success, errno, tests[i].expected_errno);
+
+		if (fd != -1)
+			close(fd);
+
+		/* test different attr.map_name during BPF_MAP_CREATE */
+		ncopy = name_len < sizeof(attr.map_name) ?
+			name_len : sizeof(attr.map_name);
+		bzero(&attr, sizeof(attr));
+		attr.map_type = BPF_MAP_TYPE_ARRAY;
+		attr.key_size = 4;
+		attr.value_size = 4;
+		attr.max_entries = 1;
+		attr.map_flags = 0;
+		memcpy(attr.map_name, tests[i].name, ncopy);
+		fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
+		CHECK((tests[i].success && fd < 0) ||
+		      (!tests[i].success && fd != -1) ||
+		      (!tests[i].success && errno != tests[i].expected_errno),
+		      "check-bpf-map-name",
+		      "fd %d(%d) errno %d(%d)\n",
+		      fd, tests[i].success, errno, tests[i].expected_errno);
+
+		if (fd != -1)
+			close(fd);
+	}
+}
+
+static void test_tp_attach_query(void)
+{
+	const int num_progs = 3;
+	int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
+	__u32 duration = 0, info_len, saved_prog_ids[num_progs];
+	const char *file = "./test_tracepoint.o";
+	struct perf_event_query_bpf *query;
+	struct perf_event_attr attr = {};
+	struct bpf_object *obj[num_progs];
+	struct bpf_prog_info prog_info;
+	char buf[256];
+
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		return;
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+		  "read", "bytes %d errno %d\n", bytes, errno))
+		return;
+
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+
+	query = malloc(sizeof(*query) + sizeof(__u32) * num_progs);
+	for (i = 0; i < num_progs; i++) {
+		err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
+				    &prog_fd[i]);
+		if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+			goto cleanup1;
+
+		bzero(&prog_info, sizeof(prog_info));
+		prog_info.jited_prog_len = 0;
+		prog_info.xlated_prog_len = 0;
+		prog_info.nr_map_ids = 0;
+		info_len = sizeof(prog_info);
+		err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len);
+		if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n",
+			  err, errno))
+			goto cleanup1;
+		saved_prog_ids[i] = prog_info.id;
+
+		pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+				    0 /* cpu 0 */, -1 /* group id */,
+				    0 /* flags */);
+		if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n",
+			  pmu_fd[i], errno))
+			goto cleanup2;
+		err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
+		if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+			  err, errno))
+			goto cleanup3;
+
+		if (i == 0) {
+			/* check NULL prog array query */
+			query->ids_len = num_progs;
+			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+			if (CHECK(err || query->prog_cnt != 0,
+				  "perf_event_ioc_query_bpf",
+				  "err %d errno %d query->prog_cnt %u\n",
+				  err, errno, query->prog_cnt))
+				goto cleanup3;
+		}
+
+		err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]);
+		if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+			  err, errno))
+			goto cleanup3;
+
+		if (i == 1) {
+			/* try to get # of programs only */
+			query->ids_len = 0;
+			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+			if (CHECK(err || query->prog_cnt != 2,
+				  "perf_event_ioc_query_bpf",
+				  "err %d errno %d query->prog_cnt %u\n",
+				  err, errno, query->prog_cnt))
+				goto cleanup3;
+
+			/* try a few negative tests */
+			/* invalid query pointer */
+			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF,
+				    (struct perf_event_query_bpf *)0x1);
+			if (CHECK(!err || errno != EFAULT,
+				  "perf_event_ioc_query_bpf",
+				  "err %d errno %d\n", err, errno))
+				goto cleanup3;
+
+			/* no enough space */
+			query->ids_len = 1;
+			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+			if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2,
+				  "perf_event_ioc_query_bpf",
+				  "err %d errno %d query->prog_cnt %u\n",
+				  err, errno, query->prog_cnt))
+				goto cleanup3;
+		}
+
+		query->ids_len = num_progs;
+		err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+		if (CHECK(err || query->prog_cnt != (i + 1),
+			  "perf_event_ioc_query_bpf",
+			  "err %d errno %d query->prog_cnt %u\n",
+			  err, errno, query->prog_cnt))
+			goto cleanup3;
+		for (j = 0; j < i + 1; j++)
+			if (CHECK(saved_prog_ids[j] != query->ids[j],
+				  "perf_event_ioc_query_bpf",
+				  "#%d saved_prog_id %x query prog_id %x\n",
+				  j, saved_prog_ids[j], query->ids[j]))
+				goto cleanup3;
+	}
+
+	i = num_progs - 1;
+	for (; i >= 0; i--) {
+ cleanup3:
+		ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
+ cleanup2:
+		close(pmu_fd[i]);
+ cleanup1:
+		bpf_object__close(obj[i]);
+	}
+	free(query);
+}
+
+static int compare_map_keys(int map1_fd, int map2_fd)
+{
+	__u32 key, next_key;
+	char val_buf[PERF_MAX_STACK_DEPTH *
+		     sizeof(struct bpf_stack_build_id)];
+	int err;
+
+	err = bpf_map_get_next_key(map1_fd, NULL, &key);
+	if (err)
+		return err;
+	err = bpf_map_lookup_elem(map2_fd, &key, val_buf);
+	if (err)
+		return err;
+
+	while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) {
+		err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf);
+		if (err)
+			return err;
+
+		key = next_key;
+	}
+	if (errno != ENOENT)
+		return -1;
+
+	return 0;
+}
+
+static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len)
+{
+	__u32 key, next_key, *cur_key_p, *next_key_p;
+	char *val_buf1, *val_buf2;
+	int i, err = 0;
+
+	val_buf1 = malloc(stack_trace_len);
+	val_buf2 = malloc(stack_trace_len);
+	cur_key_p = NULL;
+	next_key_p = &key;
+	while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) {
+		err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1);
+		if (err)
+			goto out;
+		err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2);
+		if (err)
+			goto out;
+		for (i = 0; i < stack_trace_len; i++) {
+			if (val_buf1[i] != val_buf2[i]) {
+				err = -1;
+				goto out;
+			}
+		}
+		key = *next_key_p;
+		cur_key_p = &key;
+		next_key_p = &next_key;
+	}
+	if (errno != ENOENT)
+		err = -1;
+
+out:
+	free(val_buf1);
+	free(val_buf2);
+	return err;
+}
+
+static void test_stacktrace_map()
+{
+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *file = "./test_stacktrace_map.o";
+	int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
+	struct perf_event_attr attr = {};
+	__u32 key, val, duration = 0;
+	struct bpf_object *obj;
+	char buf[256];
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+		return;
+
+	/* Get the ID for the sched/sched_switch tracepoint */
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (bytes <= 0 || bytes >= sizeof(buf))
+		goto close_prog;
+
+	/* Open the perf event and attach bpf progrram */
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+		  pmu_fd, errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (err)
+		goto disable_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (err)
+		goto disable_pmu;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (control_map_fd < 0)
+		goto disable_pmu;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (stackid_hmap_fd < 0)
+		goto disable_pmu;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (stackmap_fd < 0)
+		goto disable_pmu;
+
+	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+	if (stack_amap_fd < 0)
+		goto disable_pmu;
+
+	/* give some time for bpf program run */
+	sleep(1);
+
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu_noerr;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu_noerr;
+
+	stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
+	err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+	if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu_noerr;
+
+	goto disable_pmu_noerr;
+disable_pmu:
+	error_cnt++;
+disable_pmu_noerr:
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+	close(pmu_fd);
+close_prog:
+	bpf_object__close(obj);
+}
+
+static void test_stacktrace_map_raw_tp()
+{
+	int control_map_fd, stackid_hmap_fd, stackmap_fd;
+	const char *file = "./test_stacktrace_map.o";
+	int efd, err, prog_fd;
+	__u32 key, val, duration = 0;
+	struct bpf_object *obj;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+		return;
+
+	efd = bpf_raw_tracepoint_open("sched_switch", prog_fd);
+	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (control_map_fd < 0)
+		goto close_prog;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (stackid_hmap_fd < 0)
+		goto close_prog;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (stackmap_fd < 0)
+		goto close_prog;
+
+	/* give some time for bpf program run */
+	sleep(1);
+
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	goto close_prog_noerr;
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
+static int extract_build_id(char *build_id, size_t size)
+{
+	FILE *fp;
+	char *line = NULL;
+	size_t len = 0;
+
+	fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
+	if (fp == NULL)
+		return -1;
+
+	if (getline(&line, &len, fp) == -1)
+		goto err;
+	pclose(fp);
+
+	if (len > size)
+		len = size;
+	memcpy(build_id, line, len);
+	build_id[len] = '\0';
+	free(line);
+	return 0;
+err:
+	pclose(fp);
+	return -1;
+}
+
+static void test_stacktrace_build_id(void)
+{
+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *file = "./test_stacktrace_build_id.o";
+	int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
+	struct perf_event_attr attr = {};
+	__u32 key, previous_key, val, duration = 0;
+	struct bpf_object *obj;
+	char buf[256];
+	int i, j;
+	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+	int build_id_matches = 0;
+	int retry = 1;
+
+retry:
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+		goto out;
+
+	/* Get the ID for the sched/sched_switch tracepoint */
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/random/urandom_read/id");
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+		  "read", "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	/* Open the perf event and attach bpf progrram */
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+		  pmu_fd, errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+		  err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+	if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+	       == 0);
+	assert(system("./urandom_read") == 0);
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = extract_build_id(buf, 256);
+
+	if (CHECK(err, "get build_id with readelf",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	if (CHECK(err, "get_next_key from stackmap",
+		  "err %d, errno %d\n", err, errno))
+		goto disable_pmu;
+
+	do {
+		char build_id[64];
+
+		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+		if (CHECK(err, "lookup_elem from stackmap",
+			  "err %d, errno %d\n", err, errno))
+			goto disable_pmu;
+		for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+			if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+			    id_offs[i].offset != 0) {
+				for (j = 0; j < 20; ++j)
+					sprintf(build_id + 2 * j, "%02x",
+						id_offs[i].build_id[j] & 0xff);
+				if (strstr(buf, build_id) != NULL)
+					build_id_matches = 1;
+			}
+		previous_key = key;
+	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+	/* stack_map_get_build_id_offset() is racy and sometimes can return
+	 * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
+	 * try it one more time.
+	 */
+	if (build_id_matches < 1 && retry--) {
+		ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+		close(pmu_fd);
+		bpf_object__close(obj);
+		printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
+		       __func__);
+		goto retry;
+	}
+
+	if (CHECK(build_id_matches < 1, "build id match",
+		  "Didn't find expected build ID from the map\n"))
+		goto disable_pmu;
+
+	stack_trace_len = PERF_MAX_STACK_DEPTH
+		* sizeof(struct bpf_stack_build_id);
+	err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+	CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+	      "err %d errno %d\n", err, errno);
+
+disable_pmu:
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+	close(pmu_fd);
+
+close_prog:
+	bpf_object__close(obj);
+
+out:
+	return;
+}
+
+static void test_stacktrace_build_id_nmi(void)
+{
+	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+	const char *file = "./test_stacktrace_build_id.o";
+	int err, pmu_fd, prog_fd;
+	struct perf_event_attr attr = {
+		.sample_freq = 5000,
+		.freq = 1,
+		.type = PERF_TYPE_HARDWARE,
+		.config = PERF_COUNT_HW_CPU_CYCLES,
+	};
+	__u32 key, previous_key, val, duration = 0;
+	struct bpf_object *obj;
+	char buf[256];
+	int i, j;
+	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+	int build_id_matches = 0;
+	int retry = 1;
+
+retry:
+	err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+		return;
+
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(pmu_fd < 0, "perf_event_open",
+		  "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+		  pmu_fd, errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+		  err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	/* find map fds */
+	control_map_fd = bpf_find_map(__func__, obj, "control_map");
+	if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+	if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+	if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+		  err, errno))
+		goto disable_pmu;
+
+	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+	if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+	       == 0);
+	assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+	/* disable stack trace collection */
+	key = 0;
+	val = 1;
+	bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+	/* for every element in stackid_hmap, we can find a corresponding one
+	 * in stackmap, and vise versa.
+	 */
+	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = extract_build_id(buf, 256);
+
+	if (CHECK(err, "get build_id with readelf",
+		  "err %d errno %d\n", err, errno))
+		goto disable_pmu;
+
+	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	if (CHECK(err, "get_next_key from stackmap",
+		  "err %d, errno %d\n", err, errno))
+		goto disable_pmu;
+
+	do {
+		char build_id[64];
+
+		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+		if (CHECK(err, "lookup_elem from stackmap",
+			  "err %d, errno %d\n", err, errno))
+			goto disable_pmu;
+		for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+			if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+			    id_offs[i].offset != 0) {
+				for (j = 0; j < 20; ++j)
+					sprintf(build_id + 2 * j, "%02x",
+						id_offs[i].build_id[j] & 0xff);
+				if (strstr(buf, build_id) != NULL)
+					build_id_matches = 1;
+			}
+		previous_key = key;
+	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+	/* stack_map_get_build_id_offset() is racy and sometimes can return
+	 * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
+	 * try it one more time.
+	 */
+	if (build_id_matches < 1 && retry--) {
+		ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+		close(pmu_fd);
+		bpf_object__close(obj);
+		printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
+		       __func__);
+		goto retry;
+	}
+
+	if (CHECK(build_id_matches < 1, "build id match",
+		  "Didn't find expected build ID from the map\n"))
+		goto disable_pmu;
+
+	/*
+	 * We intentionally skip compare_stack_ips(). This is because we
+	 * only support one in_nmi() ips-to-build_id translation per cpu
+	 * at any time, thus stack_amap here will always fallback to
+	 * BPF_STACK_BUILD_ID_IP;
+	 */
+
+disable_pmu:
+	ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+	close(pmu_fd);
+
+close_prog:
+	bpf_object__close(obj);
+}
+
+#define MAX_CNT_RAWTP	10ull
+#define MAX_STACK_RAWTP	100
+struct get_stack_trace_t {
+	int pid;
+	int kern_stack_size;
+	int user_stack_size;
+	int user_stack_buildid_size;
+	__u64 kern_stack[MAX_STACK_RAWTP];
+	__u64 user_stack[MAX_STACK_RAWTP];
+	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+static int get_stack_print_output(void *data, int size)
+{
+	bool good_kern_stack = false, good_user_stack = false;
+	const char *nonjit_func = "___bpf_prog_run";
+	struct get_stack_trace_t *e = data;
+	int i, num_stack;
+	static __u64 cnt;
+	struct ksym *ks;
+
+	cnt++;
+
+	if (size < sizeof(struct get_stack_trace_t)) {
+		__u64 *raw_data = data;
+		bool found = false;
+
+		num_stack = size / sizeof(__u64);
+		/* If jit is enabled, we do not have a good way to
+		 * verify the sanity of the kernel stack. So we
+		 * just assume it is good if the stack is not empty.
+		 * This could be improved in the future.
+		 */
+		if (jit_enabled) {
+			found = num_stack > 0;
+		} else {
+			for (i = 0; i < num_stack; i++) {
+				ks = ksym_search(raw_data[i]);
+				if (strcmp(ks->name, nonjit_func) == 0) {
+					found = true;
+					break;
+				}
+			}
+		}
+		if (found) {
+			good_kern_stack = true;
+			good_user_stack = true;
+		}
+	} else {
+		num_stack = e->kern_stack_size / sizeof(__u64);
+		if (jit_enabled) {
+			good_kern_stack = num_stack > 0;
+		} else {
+			for (i = 0; i < num_stack; i++) {
+				ks = ksym_search(e->kern_stack[i]);
+				if (strcmp(ks->name, nonjit_func) == 0) {
+					good_kern_stack = true;
+					break;
+				}
+			}
+		}
+		if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
+			good_user_stack = true;
+	}
+	if (!good_kern_stack || !good_user_stack)
+		return LIBBPF_PERF_EVENT_ERROR;
+
+	if (cnt == MAX_CNT_RAWTP)
+		return LIBBPF_PERF_EVENT_DONE;
+
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void test_get_stack_raw_tp(void)
+{
+	const char *file = "./test_get_stack_rawtp.o";
+	int i, efd, err, prog_fd, pmu_fd, perfmap_fd;
+	struct perf_event_attr attr = {};
+	struct timespec tv = {0, 10};
+	__u32 key = 0, duration = 0;
+	struct bpf_object *obj;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+		return;
+
+	efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	perfmap_fd = bpf_find_map(__func__, obj, "perfmap");
+	if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+		  perfmap_fd, errno))
+		goto close_prog;
+
+	err = load_kallsyms();
+	if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/,
+			 -1/*group_fd*/, 0);
+	if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+		  errno))
+		goto close_prog;
+
+	err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY);
+	if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err,
+		  errno))
+		goto close_prog;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
+		  err, errno))
+		goto close_prog;
+
+	err = perf_event_mmap(pmu_fd);
+	if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	/* trigger some syscall action */
+	for (i = 0; i < MAX_CNT_RAWTP; i++)
+		nanosleep(&tv, NULL);
+
+	err = perf_event_poller(pmu_fd, get_stack_print_output);
+	if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	goto close_prog_noerr;
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
+static void test_task_fd_query_rawtp(void)
+{
+	const char *file = "./test_get_stack_rawtp.o";
+	__u64 probe_offset, probe_addr;
+	__u32 len, prog_id, fd_type;
+	struct bpf_object *obj;
+	int efd, err, prog_fd;
+	__u32 duration = 0;
+	char buf[256];
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+		return;
+
+	efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+	if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+
+	/* query (getpid(), efd) */
+	len = sizeof(buf);
+	err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+				&fd_type, &probe_offset, &probe_addr);
+	if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+		  errno))
+		goto close_prog;
+
+	err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+	      strcmp(buf, "sys_enter") == 0;
+	if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+		  fd_type, buf))
+		goto close_prog;
+
+	/* test zero len */
+	len = 0;
+	err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+				&fd_type, &probe_offset, &probe_addr);
+	if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n",
+		  err, errno))
+		goto close_prog;
+	err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+	      len == strlen("sys_enter");
+	if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+		goto close_prog;
+
+	/* test empty buffer */
+	len = sizeof(buf);
+	err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id,
+				&fd_type, &probe_offset, &probe_addr);
+	if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n",
+		  err, errno))
+		goto close_prog;
+	err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+	      len == strlen("sys_enter");
+	if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+		goto close_prog;
+
+	/* test smaller buffer */
+	len = 3;
+	err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+				&fd_type, &probe_offset, &probe_addr);
+	if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)",
+		  "err %d errno %d\n", err, errno))
+		goto close_prog;
+	err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+	      len == strlen("sys_enter") &&
+	      strcmp(buf, "sy") == 0;
+	if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+		goto close_prog;
+
+	goto close_prog_noerr;
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp_core(const char *probe_name,
+				       const char *tp_name)
+{
+	const char *file = "./test_tracepoint.o";
+	int err, bytes, efd, prog_fd, pmu_fd;
+	struct perf_event_attr attr = {};
+	__u64 probe_offset, probe_addr;
+	__u32 len, prog_id, fd_type;
+	struct bpf_object *obj;
+	__u32 duration = 0;
+	char buf[256];
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+	if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	snprintf(buf, sizeof(buf),
+		 "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+	efd = open(buf, O_RDONLY, 0);
+	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+		goto close_prog;
+	bytes = read(efd, buf, sizeof(buf));
+	close(efd);
+	if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+		  "bytes %d errno %d\n", bytes, errno))
+		goto close_prog;
+
+	attr.config = strtol(buf, NULL, 0);
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	attr.sample_period = 1;
+	attr.wakeup_events = 1;
+	pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+			 0 /* cpu 0 */, -1 /* group id */,
+			 0 /* flags */);
+	if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+	if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+	if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	/* query (getpid(), pmu_fd) */
+	len = sizeof(buf);
+	err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id,
+				&fd_type, &probe_offset, &probe_addr);
+	if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+		  errno))
+		goto close_pmu;
+
+	err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name);
+	if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+		  fd_type, buf))
+		goto close_pmu;
+
+	close(pmu_fd);
+	goto close_prog_noerr;
+
+close_pmu:
+	close(pmu_fd);
+close_prog:
+	error_cnt++;
+close_prog_noerr:
+	bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp(void)
+{
+	test_task_fd_query_tp_core("sched/sched_switch",
+				   "sched_switch");
+	test_task_fd_query_tp_core("syscalls/sys_enter_read",
+				   "sys_enter_read");
+}
+
+int main(void)
+{
+	jit_enabled = is_jit_enabled();
+
+	test_pkt_access();
+	test_xdp();
+	test_xdp_adjust_tail();
+	test_l4lb_all();
+	test_xdp_noinline();
+	test_tcp_estats();
+	test_bpf_obj_id();
+	test_pkt_md_access();
+	test_obj_name();
+	test_tp_attach_query();
+	test_stacktrace_map();
+	test_stacktrace_build_id();
+	test_stacktrace_build_id_nmi();
+	test_stacktrace_map_raw_tp();
+	test_get_stack_raw_tp();
+	test_task_fd_query_rawtp();
+	test_task_fd_query_tp();
+
+	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
+	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c
new file mode 100644
index 000000000..b14d25bfa
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "test_select_reuseport_common.h"
+
+#define MIN_TCPHDR_LEN 20
+#define UDPHDR_LEN 8
+
+#define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
+#define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
+#define REUSEPORT_ARRAY_SIZE 32
+
+static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
+static __u32 expected_results[NR_RESULTS];
+static int sk_fds[REUSEPORT_ARRAY_SIZE];
+static int reuseport_array, outer_map;
+static int select_by_skb_data_prog;
+static int saved_tcp_syncookie;
+static struct bpf_object *obj;
+static int saved_tcp_fo;
+static __u32 index_zero;
+static int epfd;
+
+static union sa46 {
+	struct sockaddr_in6 v6;
+	struct sockaddr_in v4;
+	sa_family_t family;
+} srv_sa;
+
+#define CHECK(condition, tag, format...) ({				\
+	int __ret = !!(condition);					\
+	if (__ret) {							\
+		printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag);	\
+		printf(format);						\
+		exit(-1);						\
+	}								\
+})
+
+static void create_maps(void)
+{
+	struct bpf_create_map_attr attr = {};
+
+	/* Creating reuseport_array */
+	attr.name = "reuseport_array";
+	attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
+	attr.key_size = sizeof(__u32);
+	attr.value_size = sizeof(__u32);
+	attr.max_entries = REUSEPORT_ARRAY_SIZE;
+
+	reuseport_array = bpf_create_map_xattr(&attr);
+	CHECK(reuseport_array == -1, "creating reuseport_array",
+	      "reuseport_array:%d errno:%d\n", reuseport_array, errno);
+
+	/* Creating outer_map */
+	attr.name = "outer_map";
+	attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
+	attr.key_size = sizeof(__u32);
+	attr.value_size = sizeof(__u32);
+	attr.max_entries = 1;
+	attr.inner_map_fd = reuseport_array;
+	outer_map = bpf_create_map_xattr(&attr);
+	CHECK(outer_map == -1, "creating outer_map",
+	      "outer_map:%d errno:%d\n", outer_map, errno);
+}
+
+static void prepare_bpf_obj(void)
+{
+	struct bpf_program *prog;
+	struct bpf_map *map;
+	int err;
+	struct bpf_object_open_attr attr = {
+		.file = "test_select_reuseport_kern.o",
+		.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+	};
+
+	obj = bpf_object__open_xattr(&attr);
+	CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
+	      "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+
+	prog = bpf_program__next(NULL, obj);
+	CHECK(!prog, "get first bpf_program", "!prog\n");
+	bpf_program__set_type(prog, attr.prog_type);
+
+	map = bpf_object__find_map_by_name(obj, "outer_map");
+	CHECK(!map, "find outer_map", "!map\n");
+	err = bpf_map__reuse_fd(map, outer_map);
+	CHECK(err, "reuse outer_map", "err:%d\n", err);
+
+	err = bpf_object__load(obj);
+	CHECK(err, "load bpf_object", "err:%d\n", err);
+
+	select_by_skb_data_prog = bpf_program__fd(prog);
+	CHECK(select_by_skb_data_prog == -1, "get prog fd",
+	      "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
+
+	map = bpf_object__find_map_by_name(obj, "result_map");
+	CHECK(!map, "find result_map", "!map\n");
+	result_map = bpf_map__fd(map);
+	CHECK(result_map == -1, "get result_map fd",
+	      "result_map:%d\n", result_map);
+
+	map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
+	CHECK(!map, "find tmp_index_ovr_map", "!map\n");
+	tmp_index_ovr_map = bpf_map__fd(map);
+	CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+	      "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
+
+	map = bpf_object__find_map_by_name(obj, "linum_map");
+	CHECK(!map, "find linum_map", "!map\n");
+	linum_map = bpf_map__fd(map);
+	CHECK(linum_map == -1, "get linum_map fd",
+	      "linum_map:%d\n", linum_map);
+
+	map = bpf_object__find_map_by_name(obj, "data_check_map");
+	CHECK(!map, "find data_check_map", "!map\n");
+	data_check_map = bpf_map__fd(map);
+	CHECK(data_check_map == -1, "get data_check_map fd",
+	      "data_check_map:%d\n", data_check_map);
+}
+
+static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
+{
+	memset(sa, 0, sizeof(*sa));
+	sa->family = family;
+	if (sa->family == AF_INET6)
+		sa->v6.sin6_addr = in6addr_loopback;
+	else
+		sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+}
+
+static void sa46_init_inany(union sa46 *sa, sa_family_t family)
+{
+	memset(sa, 0, sizeof(*sa));
+	sa->family = family;
+	if (sa->family == AF_INET6)
+		sa->v6.sin6_addr = in6addr_any;
+	else
+		sa->v4.sin_addr.s_addr = INADDR_ANY;
+}
+
+static int read_int_sysctl(const char *sysctl)
+{
+	char buf[16];
+	int fd, ret;
+
+	fd = open(sysctl, 0);
+	CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
+	      sysctl, fd, errno);
+
+	ret = read(fd, buf, sizeof(buf));
+	CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n",
+	      sysctl, ret, errno);
+	close(fd);
+
+	return atoi(buf);
+}
+
+static void write_int_sysctl(const char *sysctl, int v)
+{
+	int fd, ret, size;
+	char buf[16];
+
+	fd = open(sysctl, O_RDWR);
+	CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
+	      sysctl, fd, errno);
+
+	size = snprintf(buf, sizeof(buf), "%d", v);
+	ret = write(fd, buf, size);
+	CHECK(ret != size, "write(sysctl)",
+	      "sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno);
+	close(fd);
+}
+
+static void restore_sysctls(void)
+{
+	write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
+	write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
+}
+
+static void enable_fastopen(void)
+{
+	int fo;
+
+	fo = read_int_sysctl(TCP_FO_SYSCTL);
+	write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
+}
+
+static void enable_syncookie(void)
+{
+	write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
+}
+
+static void disable_syncookie(void)
+{
+	write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
+}
+
+static __u32 get_linum(void)
+{
+	__u32 linum;
+	int err;
+
+	err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
+	CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+	      err, errno);
+
+	return linum;
+}
+
+static void check_data(int type, sa_family_t family, const struct cmd *cmd,
+		       int cli_fd)
+{
+	struct data_check expected = {}, result;
+	union sa46 cli_sa;
+	socklen_t addrlen;
+	int err;
+
+	addrlen = sizeof(cli_sa);
+	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
+			  &addrlen);
+	CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+	      err, errno);
+
+	err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
+	CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+	      err, errno);
+
+	if (type == SOCK_STREAM) {
+		expected.len = MIN_TCPHDR_LEN;
+		expected.ip_protocol = IPPROTO_TCP;
+	} else {
+		expected.len = UDPHDR_LEN;
+		expected.ip_protocol = IPPROTO_UDP;
+	}
+
+	if (family == AF_INET6) {
+		expected.eth_protocol = htons(ETH_P_IPV6);
+		expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
+			!srv_sa.v6.sin6_addr.s6_addr32[2] &&
+			!srv_sa.v6.sin6_addr.s6_addr32[1] &&
+			!srv_sa.v6.sin6_addr.s6_addr32[0];
+
+		memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
+		       sizeof(cli_sa.v6.sin6_addr));
+		memcpy(&expected.skb_addrs[4], &in6addr_loopback,
+		       sizeof(in6addr_loopback));
+		expected.skb_ports[0] = cli_sa.v6.sin6_port;
+		expected.skb_ports[1] = srv_sa.v6.sin6_port;
+	} else {
+		expected.eth_protocol = htons(ETH_P_IP);
+		expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
+
+		expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
+		expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
+		expected.skb_ports[0] = cli_sa.v4.sin_port;
+		expected.skb_ports[1] = srv_sa.v4.sin_port;
+	}
+
+	if (memcmp(&result, &expected, offsetof(struct data_check,
+						equal_check_end))) {
+		printf("unexpected data_check\n");
+		printf("  result: (0x%x, %u, %u)\n",
+		       result.eth_protocol, result.ip_protocol,
+		       result.bind_inany);
+		printf("expected: (0x%x, %u, %u)\n",
+		       expected.eth_protocol, expected.ip_protocol,
+		       expected.bind_inany);
+		CHECK(1, "data_check result != expected",
+		      "bpf_prog_linum:%u\n", get_linum());
+	}
+
+	CHECK(!result.hash, "data_check result.hash empty",
+	      "result.hash:%u", result.hash);
+
+	expected.len += cmd ? sizeof(*cmd) : 0;
+	if (type == SOCK_STREAM)
+		CHECK(expected.len > result.len, "expected.len > result.len",
+		      "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
+		      expected.len, result.len, get_linum());
+	else
+		CHECK(expected.len != result.len, "expected.len != result.len",
+		      "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
+		      expected.len, result.len, get_linum());
+}
+
+static void check_results(void)
+{
+	__u32 results[NR_RESULTS];
+	__u32 i, broken = 0;
+	int err;
+
+	for (i = 0; i < NR_RESULTS; i++) {
+		err = bpf_map_lookup_elem(result_map, &i, &results[i]);
+		CHECK(err == -1, "lookup_elem(result_map)",
+		      "i:%u err:%d errno:%d\n", i, err, errno);
+	}
+
+	for (i = 0; i < NR_RESULTS; i++) {
+		if (results[i] != expected_results[i]) {
+			broken = i;
+			break;
+		}
+	}
+
+	if (i == NR_RESULTS)
+		return;
+
+	printf("unexpected result\n");
+	printf(" result: [");
+	printf("%u", results[0]);
+	for (i = 1; i < NR_RESULTS; i++)
+		printf(", %u", results[i]);
+	printf("]\n");
+
+	printf("expected: [");
+	printf("%u", expected_results[0]);
+	for (i = 1; i < NR_RESULTS; i++)
+		printf(", %u", expected_results[i]);
+	printf("]\n");
+
+	CHECK(expected_results[broken] != results[broken],
+	      "unexpected result",
+	      "expected_results[%u] != results[%u] bpf_prog_linum:%u\n",
+	      broken, broken, get_linum());
+}
+
+static int send_data(int type, sa_family_t family, void *data, size_t len,
+		     enum result expected)
+{
+	union sa46 cli_sa;
+	int fd, err;
+
+	fd = socket(family, type, 0);
+	CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
+
+	sa46_init_loopback(&cli_sa, family);
+	err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
+	CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
+
+	err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
+		     sizeof(srv_sa));
+	CHECK(err != len && expected >= PASS,
+	      "sendto()", "family:%u err:%d errno:%d expected:%d\n",
+	      family, err, errno, expected);
+
+	return fd;
+}
+
+static void do_test(int type, sa_family_t family, struct cmd *cmd,
+		    enum result expected)
+{
+	int nev, srv_fd, cli_fd;
+	struct epoll_event ev;
+	struct cmd rcv_cmd;
+	ssize_t nread;
+
+	cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
+			   expected);
+	nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
+	CHECK((nev <= 0 && expected >= PASS) ||
+	      (nev > 0 && expected < PASS),
+	      "nev <> expected",
+	      "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
+	      nev, expected, type, family,
+	      cmd ? cmd->reuseport_index : -1,
+	      cmd ? cmd->pass_on_failure : -1);
+	check_results();
+	check_data(type, family, cmd, cli_fd);
+
+	if (expected < PASS)
+		return;
+
+	CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
+	      cmd->reuseport_index != ev.data.u32,
+	      "check cmd->reuseport_index",
+	      "cmd:(%u, %u) ev.data.u32:%u\n",
+	      cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
+
+	srv_fd = sk_fds[ev.data.u32];
+	if (type == SOCK_STREAM) {
+		int new_fd = accept(srv_fd, NULL, 0);
+
+		CHECK(new_fd == -1, "accept(srv_fd)",
+		      "ev.data.u32:%u new_fd:%d errno:%d\n",
+		      ev.data.u32, new_fd, errno);
+
+		nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
+		CHECK(nread != sizeof(rcv_cmd),
+		      "recv(new_fd)",
+		      "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+		      ev.data.u32, nread, sizeof(rcv_cmd), errno);
+
+		close(new_fd);
+	} else {
+		nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
+		CHECK(nread != sizeof(rcv_cmd),
+		      "recv(sk_fds)",
+		      "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+		      ev.data.u32, nread, sizeof(rcv_cmd), errno);
+	}
+
+	close(cli_fd);
+}
+
+static void test_err_inner_map(int type, sa_family_t family)
+{
+	struct cmd cmd = {
+		.reuseport_index = 0,
+		.pass_on_failure = 0,
+	};
+
+	printf("%s: ", __func__);
+	expected_results[DROP_ERR_INNER_MAP]++;
+	do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
+	printf("OK\n");
+}
+
+static void test_err_skb_data(int type, sa_family_t family)
+{
+	printf("%s: ", __func__);
+	expected_results[DROP_ERR_SKB_DATA]++;
+	do_test(type, family, NULL, DROP_ERR_SKB_DATA);
+	printf("OK\n");
+}
+
+static void test_err_sk_select_port(int type, sa_family_t family)
+{
+	struct cmd cmd = {
+		.reuseport_index = REUSEPORT_ARRAY_SIZE,
+		.pass_on_failure = 0,
+	};
+
+	printf("%s: ", __func__);
+	expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
+	do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
+	printf("OK\n");
+}
+
+static void test_pass(int type, sa_family_t family)
+{
+	struct cmd cmd;
+	int i;
+
+	printf("%s: ", __func__);
+	cmd.pass_on_failure = 0;
+	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
+		expected_results[PASS]++;
+		cmd.reuseport_index = i;
+		do_test(type, family, &cmd, PASS);
+	}
+	printf("OK\n");
+}
+
+static void test_syncookie(int type, sa_family_t family)
+{
+	int err, tmp_index = 1;
+	struct cmd cmd = {
+		.reuseport_index = 0,
+		.pass_on_failure = 0,
+	};
+
+	if (type != SOCK_STREAM)
+		return;
+
+	printf("%s: ", __func__);
+	/*
+	 * +1 for TCP-SYN and
+	 * +1 for the TCP-ACK (ack the syncookie)
+	 */
+	expected_results[PASS] += 2;
+	enable_syncookie();
+	/*
+	 * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
+	 * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
+	 *          tmp_index_ovr_map
+	 * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
+	 *          is from the cmd.reuseport_index
+	 */
+	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
+				  &tmp_index, BPF_ANY);
+	CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+	      "err:%d errno:%d\n", err, errno);
+	do_test(type, family, &cmd, PASS);
+	err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
+				  &tmp_index);
+	CHECK(err == -1 || tmp_index != -1,
+	      "lookup_elem(tmp_index_ovr_map)",
+	      "err:%d errno:%d tmp_index:%d\n",
+	      err, errno, tmp_index);
+	disable_syncookie();
+	printf("OK\n");
+}
+
+static void test_pass_on_err(int type, sa_family_t family)
+{
+	struct cmd cmd = {
+		.reuseport_index = REUSEPORT_ARRAY_SIZE,
+		.pass_on_failure = 1,
+	};
+
+	printf("%s: ", __func__);
+	expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
+	do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
+	printf("OK\n");
+}
+
+static void prepare_sk_fds(int type, sa_family_t family, bool inany)
+{
+	const int first = REUSEPORT_ARRAY_SIZE - 1;
+	int i, err, optval = 1;
+	struct epoll_event ev;
+	socklen_t addrlen;
+
+	if (inany)
+		sa46_init_inany(&srv_sa, family);
+	else
+		sa46_init_loopback(&srv_sa, family);
+	addrlen = sizeof(srv_sa);
+
+	/*
+	 * The sk_fds[] is filled from the back such that the order
+	 * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
+	 */
+	for (i = first; i >= 0; i--) {
+		sk_fds[i] = socket(family, type, 0);
+		CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
+		      i, sk_fds[i], errno);
+		err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
+				 &optval, sizeof(optval));
+		CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
+		      "sk_fds[%d] err:%d errno:%d\n",
+		      i, err, errno);
+
+		if (i == first) {
+			err = setsockopt(sk_fds[i], SOL_SOCKET,
+					 SO_ATTACH_REUSEPORT_EBPF,
+					 &select_by_skb_data_prog,
+					 sizeof(select_by_skb_data_prog));
+			CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+			      "err:%d errno:%d\n", err, errno);
+		}
+
+		err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
+		CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+		      i, err, errno);
+
+		if (type == SOCK_STREAM) {
+			err = listen(sk_fds[i], 10);
+			CHECK(err == -1, "listen()",
+			      "sk_fds[%d] err:%d errno:%d\n",
+			      i, err, errno);
+		}
+
+		err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
+					  BPF_NOEXIST);
+		CHECK(err == -1, "update_elem(reuseport_array)",
+		      "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+
+		if (i == first) {
+			socklen_t addrlen = sizeof(srv_sa);
+
+			err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
+					  &addrlen);
+			CHECK(err == -1, "getsockname()",
+			      "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+		}
+	}
+
+	epfd = epoll_create(1);
+	CHECK(epfd == -1, "epoll_create(1)",
+	      "epfd:%d errno:%d\n", epfd, errno);
+
+	ev.events = EPOLLIN;
+	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
+		ev.data.u32 = i;
+		err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
+		CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
+	}
+}
+
+static void setup_per_test(int type, unsigned short family, bool inany)
+{
+	int ovr = -1, err;
+
+	prepare_sk_fds(type, family, inany);
+	err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
+				  BPF_ANY);
+	CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+	      "err:%d errno:%d\n", err, errno);
+}
+
+static void cleanup_per_test(void)
+{
+	int i, err, zero = 0;
+
+	memset(expected_results, 0, sizeof(expected_results));
+
+	for (i = 0; i < NR_RESULTS; i++) {
+		err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
+		CHECK(err, "reset elem in result_map",
+		      "i:%u err:%d errno:%d\n", i, err, errno);
+	}
+
+	err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
+	CHECK(err, "reset line number in linum_map", "err:%d errno:%d\n",
+	      err, errno);
+
+	for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
+		close(sk_fds[i]);
+	close(epfd);
+
+	err = bpf_map_delete_elem(outer_map, &index_zero);
+	CHECK(err == -1, "delete_elem(outer_map)",
+	      "err:%d errno:%d\n", err, errno);
+}
+
+static void cleanup(void)
+{
+	close(outer_map);
+	close(reuseport_array);
+	bpf_object__close(obj);
+}
+
+static void test_all(void)
+{
+	/* Extra SOCK_STREAM to test bind_inany==true */
+	const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM };
+	const char * const type_strings[] = { "TCP", "UDP", "TCP" };
+	const char * const family_strings[] = { "IPv6", "IPv4" };
+	const unsigned short families[] = { AF_INET6, AF_INET };
+	const bool bind_inany[] = { false, false, true };
+	int t, f, err;
+
+	for (f = 0; f < ARRAY_SIZE(families); f++) {
+		unsigned short family = families[f];
+
+		for (t = 0; t < ARRAY_SIZE(types); t++) {
+			bool inany = bind_inany[t];
+			int type = types[t];
+
+			printf("######## %s/%s %s ########\n",
+			       family_strings[f], type_strings[t],
+				inany ? " INANY  " : "LOOPBACK");
+
+			setup_per_test(type, family, inany);
+
+			test_err_inner_map(type, family);
+
+			/* Install reuseport_array to the outer_map */
+			err = bpf_map_update_elem(outer_map, &index_zero,
+						  &reuseport_array, BPF_ANY);
+			CHECK(err == -1, "update_elem(outer_map)",
+			      "err:%d errno:%d\n", err, errno);
+
+			test_err_skb_data(type, family);
+			test_err_sk_select_port(type, family);
+			test_pass(type, family);
+			test_syncookie(type, family);
+			test_pass_on_err(type, family);
+
+			cleanup_per_test();
+			printf("\n");
+		}
+	}
+}
+
+int main(int argc, const char **argv)
+{
+	create_maps();
+	prepare_bpf_obj();
+	saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
+	saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
+	enable_fastopen();
+	disable_syncookie();
+	atexit(restore_sysctls);
+
+	test_all();
+
+	cleanup();
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_select_reuseport_common.h b/tools/testing/selftests/bpf/test_select_reuseport_common.h
new file mode 100644
index 000000000..08eb2a9f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport_common.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __TEST_SELECT_REUSEPORT_COMMON_H
+#define __TEST_SELECT_REUSEPORT_COMMON_H
+
+#include <linux/types.h>
+
+enum result {
+	DROP_ERR_INNER_MAP,
+	DROP_ERR_SKB_DATA,
+	DROP_ERR_SK_SELECT_REUSEPORT,
+	DROP_MISC,
+	PASS,
+	PASS_ERR_SK_SELECT_REUSEPORT,
+	NR_RESULTS,
+};
+
+struct cmd {
+	__u32 reuseport_index;
+	__u32 pass_on_failure;
+};
+
+struct data_check {
+	__u32 ip_protocol;
+	__u32 skb_addrs[8];
+	__u16 skb_ports[2];
+	__u16 eth_protocol;
+	__u8  bind_inany;
+	__u8  equal_check_end[0];
+
+	__u32 len;
+	__u32 hash;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/test_select_reuseport_kern.c
new file mode 100644
index 000000000..5b54ec637
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport_kern.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "bpf_endian.h"
+#include "bpf_helpers.h"
+#include "test_select_reuseport_common.h"
+
+int _version SEC("version") = 1;
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+struct bpf_map_def SEC("maps") outer_map = {
+	.type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") result_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = NR_RESULTS,
+};
+
+struct bpf_map_def SEC("maps") tmp_index_ovr_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(int),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") linum_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") data_check_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct data_check),
+	.max_entries = 1,
+};
+
+#define GOTO_DONE(_result) ({			\
+	result = (_result);			\
+	linum = __LINE__;			\
+	goto done;				\
+})
+
+SEC("select_by_skb_data")
+int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
+{
+	__u32 linum, index = 0, flags = 0, index_zero = 0;
+	__u32 *result_cnt, *linum_value;
+	struct data_check data_check = {};
+	struct cmd *cmd, cmd_copy;
+	void *data, *data_end;
+	void *reuseport_array;
+	enum result result;
+	int *index_ovr;
+	int err;
+
+	data = reuse_md->data;
+	data_end = reuse_md->data_end;
+	data_check.len = reuse_md->len;
+	data_check.eth_protocol = reuse_md->eth_protocol;
+	data_check.ip_protocol = reuse_md->ip_protocol;
+	data_check.hash = reuse_md->hash;
+	data_check.bind_inany = reuse_md->bind_inany;
+	if (data_check.eth_protocol == bpf_htons(ETH_P_IP)) {
+		if (bpf_skb_load_bytes_relative(reuse_md,
+						offsetof(struct iphdr, saddr),
+						data_check.skb_addrs, 8,
+						BPF_HDR_START_NET))
+			GOTO_DONE(DROP_MISC);
+	} else {
+		if (bpf_skb_load_bytes_relative(reuse_md,
+						offsetof(struct ipv6hdr, saddr),
+						data_check.skb_addrs, 32,
+						BPF_HDR_START_NET))
+			GOTO_DONE(DROP_MISC);
+	}
+
+	/*
+	 * The ip_protocol could be a compile time decision
+	 * if the bpf_prog.o is dedicated to either TCP or
+	 * UDP.
+	 *
+	 * Otherwise, reuse_md->ip_protocol or
+	 * the protocol field in the iphdr can be used.
+	 */
+	if (data_check.ip_protocol == IPPROTO_TCP) {
+		struct tcphdr *th = data;
+
+		if (th + 1 > data_end)
+			GOTO_DONE(DROP_MISC);
+
+		data_check.skb_ports[0] = th->source;
+		data_check.skb_ports[1] = th->dest;
+
+		if ((th->doff << 2) + sizeof(*cmd) > data_check.len)
+			GOTO_DONE(DROP_ERR_SKB_DATA);
+		if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy,
+				       sizeof(cmd_copy)))
+			GOTO_DONE(DROP_MISC);
+		cmd = &cmd_copy;
+	} else if (data_check.ip_protocol == IPPROTO_UDP) {
+		struct udphdr *uh = data;
+
+		if (uh + 1 > data_end)
+			GOTO_DONE(DROP_MISC);
+
+		data_check.skb_ports[0] = uh->source;
+		data_check.skb_ports[1] = uh->dest;
+
+		if (sizeof(struct udphdr) + sizeof(*cmd) > data_check.len)
+			GOTO_DONE(DROP_ERR_SKB_DATA);
+		if (data + sizeof(struct udphdr) + sizeof(*cmd) > data_end) {
+			if (bpf_skb_load_bytes(reuse_md, sizeof(struct udphdr),
+					       &cmd_copy, sizeof(cmd_copy)))
+				GOTO_DONE(DROP_MISC);
+			cmd = &cmd_copy;
+		} else {
+			cmd = data + sizeof(struct udphdr);
+		}
+	} else {
+		GOTO_DONE(DROP_MISC);
+	}
+
+	reuseport_array = bpf_map_lookup_elem(&outer_map, &index_zero);
+	if (!reuseport_array)
+		GOTO_DONE(DROP_ERR_INNER_MAP);
+
+	index = cmd->reuseport_index;
+	index_ovr = bpf_map_lookup_elem(&tmp_index_ovr_map, &index_zero);
+	if (!index_ovr)
+		GOTO_DONE(DROP_MISC);
+
+	if (*index_ovr != -1) {
+		index = *index_ovr;
+		*index_ovr = -1;
+	}
+	err = bpf_sk_select_reuseport(reuse_md, reuseport_array, &index,
+				      flags);
+	if (!err)
+		GOTO_DONE(PASS);
+
+	if (cmd->pass_on_failure)
+		GOTO_DONE(PASS_ERR_SK_SELECT_REUSEPORT);
+	else
+		GOTO_DONE(DROP_ERR_SK_SELECT_REUSEPORT);
+
+done:
+	result_cnt = bpf_map_lookup_elem(&result_map, &result);
+	if (!result_cnt)
+		return SK_DROP;
+
+	bpf_map_update_elem(&linum_map, &index_zero, &linum, BPF_ANY);
+	bpf_map_update_elem(&data_check_map, &index_zero, &data_check, BPF_ANY);
+
+	(*result_cnt)++;
+	return result < PASS ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
new file mode 100755
index 000000000..42544a969
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Facebook
+
+set -eu
+
+wait_for_ip()
+{
+	local _i
+	echo -n "Wait for testing link-local IP to become available "
+	for _i in $(seq ${MAX_PING_TRIES}); do
+		echo -n "."
+		if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
+			echo " OK"
+			return
+		fi
+		sleep 1
+	done
+	echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+	exit 1
+}
+
+setup()
+{
+	# Create testing interfaces not to interfere with current environment.
+	ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
+	ip link set ${TEST_IF} up
+	ip link set ${TEST_IF_PEER} up
+
+	wait_for_ip
+
+	tc qdisc add dev ${TEST_IF} clsact
+	tc filter add dev ${TEST_IF} egress bpf obj ${BPF_PROG_OBJ} \
+		sec ${BPF_PROG_SECTION} da
+
+	BPF_PROG_ID=$(tc filter show dev ${TEST_IF} egress | \
+			awk '/ id / {sub(/.* id /, "", $0); print($1)}')
+}
+
+cleanup()
+{
+	ip link del ${TEST_IF} 2>/dev/null || :
+	ip link del ${TEST_IF_PEER} 2>/dev/null || :
+}
+
+main()
+{
+	trap cleanup EXIT 2 3 6 15
+	setup
+	${PROG} ${TEST_IF} ${BPF_PROG_ID}
+}
+
+DIR=$(dirname $0)
+TEST_IF="test_cgid_1"
+TEST_IF_PEER="test_cgid_2"
+MAX_PING_TRIES=5
+BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
+BPF_PROG_SECTION="cgroup_id_logger"
+BPF_PROG_ID=0
+PROG="${DIR}/test_skb_cgroup_id_user"
+
+main
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
new file mode 100644
index 000000000..68cf9829f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+#include <string.h>
+
+#include "bpf_helpers.h"
+
+#define NUM_CGROUP_LEVELS	4
+
+struct bpf_map_def SEC("maps") cgroup_ids = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = NUM_CGROUP_LEVELS,
+};
+
+static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
+{
+	__u64 id;
+
+	/* [1] &level passed to external function that may change it, it's
+	 *     incompatible with loop unroll.
+	 */
+	id = bpf_skb_ancestor_cgroup_id(skb, level);
+	bpf_map_update_elem(&cgroup_ids, &level, &id, 0);
+}
+
+SEC("cgroup_id_logger")
+int log_cgroup_id(struct __sk_buff *skb)
+{
+	/* Loop unroll can't be used here due to [1]. Unrolling manually.
+	 * Number of calls should be in sync with NUM_CGROUP_LEVELS.
+	 */
+	log_nth_level(skb, 0);
+	log_nth_level(skb, 1);
+	log_nth_level(skb, 2);
+	log_nth_level(skb, 3);
+
+	return TC_ACT_OK;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
new file mode 100644
index 000000000..c121cc59f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define CGROUP_PATH		"/skb_cgroup_test"
+#define NUM_CGROUP_LEVELS	4
+
+/* RFC 4291, Section 2.7.1 */
+#define LINKLOCAL_MULTICAST	"ff02::1"
+
+static int mk_dst_addr(const char *ip, const char *iface,
+		       struct sockaddr_in6 *dst)
+{
+	memset(dst, 0, sizeof(*dst));
+
+	dst->sin6_family = AF_INET6;
+	dst->sin6_port = htons(1025);
+
+	if (inet_pton(AF_INET6, ip, &dst->sin6_addr) != 1) {
+		log_err("Invalid IPv6: %s", ip);
+		return -1;
+	}
+
+	dst->sin6_scope_id = if_nametoindex(iface);
+	if (!dst->sin6_scope_id) {
+		log_err("Failed to get index of iface: %s", iface);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int send_packet(const char *iface)
+{
+	struct sockaddr_in6 dst;
+	char msg[] = "msg";
+	int err = 0;
+	int fd = -1;
+
+	if (mk_dst_addr(LINKLOCAL_MULTICAST, iface, &dst))
+		goto err;
+
+	fd = socket(AF_INET6, SOCK_DGRAM, 0);
+	if (fd == -1) {
+		log_err("Failed to create UDP socket");
+		goto err;
+	}
+
+	if (sendto(fd, &msg, sizeof(msg), 0, (const struct sockaddr *)&dst,
+		   sizeof(dst)) == -1) {
+		log_err("Failed to send datagram");
+		goto err;
+	}
+
+	goto out;
+err:
+	err = -1;
+out:
+	if (fd >= 0)
+		close(fd);
+	return err;
+}
+
+int get_map_fd_by_prog_id(int prog_id)
+{
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
+	__u32 map_ids[1];
+	int prog_fd = -1;
+	int map_fd = -1;
+
+	prog_fd = bpf_prog_get_fd_by_id(prog_id);
+	if (prog_fd < 0) {
+		log_err("Failed to get fd by prog id %d", prog_id);
+		goto err;
+	}
+
+	info.nr_map_ids = 1;
+	info.map_ids = (__u64) (unsigned long) map_ids;
+
+	if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+		log_err("Failed to get info by prog fd %d", prog_fd);
+		goto err;
+	}
+
+	if (!info.nr_map_ids) {
+		log_err("No maps found for prog fd %d", prog_fd);
+		goto err;
+	}
+
+	map_fd = bpf_map_get_fd_by_id(map_ids[0]);
+	if (map_fd < 0)
+		log_err("Failed to get fd by map id %d", map_ids[0]);
+err:
+	if (prog_fd >= 0)
+		close(prog_fd);
+	return map_fd;
+}
+
+int check_ancestor_cgroup_ids(int prog_id)
+{
+	__u64 actual_ids[NUM_CGROUP_LEVELS], expected_ids[NUM_CGROUP_LEVELS];
+	__u32 level;
+	int err = 0;
+	int map_fd;
+
+	expected_ids[0] = 0x100000001;	/* root cgroup */
+	expected_ids[1] = get_cgroup_id("");
+	expected_ids[2] = get_cgroup_id(CGROUP_PATH);
+	expected_ids[3] = 0; /* non-existent cgroup */
+
+	map_fd = get_map_fd_by_prog_id(prog_id);
+	if (map_fd < 0)
+		goto err;
+
+	for (level = 0; level < NUM_CGROUP_LEVELS; ++level) {
+		if (bpf_map_lookup_elem(map_fd, &level, &actual_ids[level])) {
+			log_err("Failed to lookup key %d", level);
+			goto err;
+		}
+		if (actual_ids[level] != expected_ids[level]) {
+			log_err("%llx (actual) != %llx (expected), level: %u\n",
+				actual_ids[level], expected_ids[level], level);
+			goto err;
+		}
+	}
+
+	goto out;
+err:
+	err = -1;
+out:
+	if (map_fd >= 0)
+		close(map_fd);
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	int cgfd = -1;
+	int err = 0;
+
+	if (argc < 3) {
+		fprintf(stderr, "Usage: %s iface prog_id\n", argv[0]);
+		exit(EXIT_FAILURE);
+	}
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cgfd = create_and_get_cgroup(CGROUP_PATH);
+	if (!cgfd)
+		goto err;
+
+	if (join_cgroup(CGROUP_PATH))
+		goto err;
+
+	if (send_packet(argv[1]))
+		goto err;
+
+	if (check_ancestor_cgroup_ids(atoi(argv[2])))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(cgfd);
+	cleanup_cgroup_environment();
+	printf("[%s]\n", err ? "FAIL" : "PASS");
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
new file mode 100644
index 000000000..e95671220
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_endian.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+#define CG_PATH		"/foo"
+#define MAX_INSNS	512
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sock_test {
+	const char *descr;
+	/* BPF prog properties */
+	struct bpf_insn	insns[MAX_INSNS];
+	enum bpf_attach_type expected_attach_type;
+	enum bpf_attach_type attach_type;
+	/* Socket properties */
+	int domain;
+	int type;
+	/* Endpoint to bind() to */
+	const char *ip;
+	unsigned short port;
+	/* Expected test result */
+	enum {
+		LOAD_REJECT,
+		ATTACH_REJECT,
+		BIND_REJECT,
+		SUCCESS,
+	} result;
+};
+
+static struct sock_test tests[] = {
+	{
+		"bind4 load with invalid access: src_ip6",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip6[0])),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		LOAD_REJECT,
+	},
+	{
+		"bind4 load with invalid access: mark",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, mark)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		LOAD_REJECT,
+	},
+	{
+		"bind6 load with invalid access: src_ip4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip4)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET6_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		LOAD_REJECT,
+	},
+	{
+		"sock_create load with invalid access: src_port",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_port)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET_SOCK_CREATE,
+		BPF_CGROUP_INET_SOCK_CREATE,
+		0,
+		0,
+		NULL,
+		0,
+		LOAD_REJECT,
+	},
+	{
+		"sock_create load w/o expected_attach_type (compat mode)",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		0,
+		BPF_CGROUP_INET_SOCK_CREATE,
+		AF_INET,
+		SOCK_STREAM,
+		"127.0.0.1",
+		8097,
+		SUCCESS,
+	},
+	{
+		"sock_create load w/ expected_attach_type",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET_SOCK_CREATE,
+		BPF_CGROUP_INET_SOCK_CREATE,
+		AF_INET,
+		SOCK_STREAM,
+		"127.0.0.1",
+		8097,
+		SUCCESS,
+	},
+	{
+		"attach type mismatch bind4 vs bind6",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET6_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		ATTACH_REJECT,
+	},
+	{
+		"attach type mismatch bind6 vs bind4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		ATTACH_REJECT,
+	},
+	{
+		"attach type mismatch default vs bind4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		0,
+		BPF_CGROUP_INET4_POST_BIND,
+		0,
+		0,
+		NULL,
+		0,
+		ATTACH_REJECT,
+	},
+	{
+		"attach type mismatch bind6 vs sock_create",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET_SOCK_CREATE,
+		0,
+		0,
+		NULL,
+		0,
+		ATTACH_REJECT,
+	},
+	{
+		"bind4 reject all",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		"0.0.0.0",
+		0,
+		BIND_REJECT,
+	},
+	{
+		"bind6 reject all",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET6_POST_BIND,
+		AF_INET6,
+		SOCK_STREAM,
+		"::",
+		0,
+		BIND_REJECT,
+	},
+	{
+		"bind6 deny specific IP & port",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+			/* if (ip == expected && port == expected) */
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip6[3])),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+				    __bpf_constant_ntohl(0x00000001), 4),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_port)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+			/* return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_A(1),
+
+			/* else return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET6_POST_BIND,
+		AF_INET6,
+		SOCK_STREAM,
+		"::1",
+		8193,
+		BIND_REJECT,
+	},
+	{
+		"bind4 allow specific IP & port",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+			/* if (ip == expected && port == expected) */
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_ip4)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+				    __bpf_constant_ntohl(0x7F000001), 4),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+				    offsetof(struct bpf_sock, src_port)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+			/* return ALLOW; */
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_A(1),
+
+			/* else return DENY; */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		"127.0.0.1",
+		4098,
+		SUCCESS,
+	},
+	{
+		"bind4 allow all",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET4_POST_BIND,
+		BPF_CGROUP_INET4_POST_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		"0.0.0.0",
+		0,
+		SUCCESS,
+	},
+	{
+		"bind6 allow all",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		BPF_CGROUP_INET6_POST_BIND,
+		BPF_CGROUP_INET6_POST_BIND,
+		AF_INET6,
+		SOCK_STREAM,
+		"::",
+		0,
+		SUCCESS,
+	},
+};
+
+static size_t probe_prog_length(const struct bpf_insn *fp)
+{
+	size_t len;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].imm != 0)
+			break;
+	return len + 1;
+}
+
+static int load_sock_prog(const struct bpf_insn *prog,
+			  enum bpf_attach_type attach_type)
+{
+	struct bpf_load_program_attr attr;
+
+	memset(&attr, 0, sizeof(struct bpf_load_program_attr));
+	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+	attr.expected_attach_type = attach_type;
+	attr.insns = prog;
+	attr.insns_cnt = probe_prog_length(attr.insns);
+	attr.license = "GPL";
+
+	return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+}
+
+static int attach_sock_prog(int cgfd, int progfd,
+			    enum bpf_attach_type attach_type)
+{
+	return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
+}
+
+static int bind_sock(int domain, int type, const char *ip, unsigned short port)
+{
+	struct sockaddr_storage addr;
+	struct sockaddr_in6 *addr6;
+	struct sockaddr_in *addr4;
+	int sockfd = -1;
+	socklen_t len;
+	int err = 0;
+
+	sockfd = socket(domain, type, 0);
+	if (sockfd < 0)
+		goto err;
+
+	memset(&addr, 0, sizeof(addr));
+
+	if (domain == AF_INET) {
+		len = sizeof(struct sockaddr_in);
+		addr4 = (struct sockaddr_in *)&addr;
+		addr4->sin_family = domain;
+		addr4->sin_port = htons(port);
+		if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1)
+			goto err;
+	} else if (domain == AF_INET6) {
+		len = sizeof(struct sockaddr_in6);
+		addr6 = (struct sockaddr_in6 *)&addr;
+		addr6->sin6_family = domain;
+		addr6->sin6_port = htons(port);
+		if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1)
+			goto err;
+	} else {
+		goto err;
+	}
+
+	if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(sockfd);
+	return err;
+}
+
+static int run_test_case(int cgfd, const struct sock_test *test)
+{
+	int progfd = -1;
+	int err = 0;
+
+	printf("Test case: %s .. ", test->descr);
+	progfd = load_sock_prog(test->insns, test->expected_attach_type);
+	if (progfd < 0) {
+		if (test->result == LOAD_REJECT)
+			goto out;
+		else
+			goto err;
+	}
+
+	if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
+		if (test->result == ATTACH_REJECT)
+			goto out;
+		else
+			goto err;
+	}
+
+	if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
+		/* sys_bind() may fail for different reasons, errno has to be
+		 * checked to confirm that BPF program rejected it.
+		 */
+		if (test->result == BIND_REJECT && errno == EPERM)
+			goto out;
+		else
+			goto err;
+	}
+
+
+	if (test->result != SUCCESS)
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	/* Detaching w/o checking return code: best effort attempt. */
+	if (progfd != -1)
+		bpf_prog_detach(cgfd, test->attach_type);
+	close(progfd);
+	printf("[%s]\n", err ? "FAIL" : "PASS");
+	return err;
+}
+
+static int run_tests(int cgfd)
+{
+	int passes = 0;
+	int fails = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+		if (run_test_case(cgfd, &tests[i]))
+			++fails;
+		else
+			++passes;
+	}
+	printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+	return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+	int cgfd = -1;
+	int err = 0;
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cgfd = create_and_get_cgroup(CG_PATH);
+	if (!cgfd)
+		goto err;
+
+	if (join_cgroup(CG_PATH))
+		goto err;
+
+	if (run_tests(cgfd))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(cgfd);
+	cleanup_cgroup_environment();
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
new file mode 100644
index 000000000..e38f1cb70
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -0,0 +1,1441 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+#ifndef ENOTSUPP
+# define ENOTSUPP 524
+#endif
+
+#define CG_PATH	"/foo"
+#define CONNECT4_PROG_PATH	"./connect4_prog.o"
+#define CONNECT6_PROG_PATH	"./connect6_prog.o"
+#define SENDMSG4_PROG_PATH	"./sendmsg4_prog.o"
+#define SENDMSG6_PROG_PATH	"./sendmsg6_prog.o"
+
+#define SERV4_IP		"192.168.1.254"
+#define SERV4_REWRITE_IP	"127.0.0.1"
+#define SRC4_IP			"172.16.0.1"
+#define SRC4_REWRITE_IP		"127.0.0.4"
+#define SERV4_PORT		4040
+#define SERV4_REWRITE_PORT	4444
+
+#define SERV6_IP		"face:b00c:1234:5678::abcd"
+#define SERV6_REWRITE_IP	"::1"
+#define SERV6_V4MAPPED_IP	"::ffff:192.168.0.4"
+#define SRC6_IP			"::1"
+#define SRC6_REWRITE_IP		"::6"
+#define WILDCARD6_IP		"::"
+#define SERV6_PORT		6060
+#define SERV6_REWRITE_PORT	6666
+
+#define INET_NTOP_BUF	40
+
+struct sock_addr_test;
+
+typedef int (*load_fn)(const struct sock_addr_test *test);
+typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sock_addr_test {
+	const char *descr;
+	/* BPF prog properties */
+	load_fn loadfn;
+	enum bpf_attach_type expected_attach_type;
+	enum bpf_attach_type attach_type;
+	/* Socket properties */
+	int domain;
+	int type;
+	/* IP:port pairs for BPF prog to override */
+	const char *requested_ip;
+	unsigned short requested_port;
+	const char *expected_ip;
+	unsigned short expected_port;
+	const char *expected_src_ip;
+	/* Expected test result */
+	enum {
+		LOAD_REJECT,
+		ATTACH_REJECT,
+		SYSCALL_EPERM,
+		SYSCALL_ENOTSUPP,
+		SUCCESS,
+	} expected_result;
+};
+
+static int bind4_prog_load(const struct sock_addr_test *test);
+static int bind6_prog_load(const struct sock_addr_test *test);
+static int connect4_prog_load(const struct sock_addr_test *test);
+static int connect6_prog_load(const struct sock_addr_test *test);
+static int sendmsg_allow_prog_load(const struct sock_addr_test *test);
+static int sendmsg_deny_prog_load(const struct sock_addr_test *test);
+static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
+static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test);
+
+static struct sock_addr_test tests[] = {
+	/* bind */
+	{
+		"bind4: load prog with wrong expected attach type",
+		bind4_prog_load,
+		BPF_CGROUP_INET6_BIND,
+		BPF_CGROUP_INET4_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"bind4: attach prog with wrong attach type",
+		bind4_prog_load,
+		BPF_CGROUP_INET4_BIND,
+		BPF_CGROUP_INET6_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"bind4: rewrite IP & TCP port in",
+		bind4_prog_load,
+		BPF_CGROUP_INET4_BIND,
+		BPF_CGROUP_INET4_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		NULL,
+		SUCCESS,
+	},
+	{
+		"bind4: rewrite IP & UDP port in",
+		bind4_prog_load,
+		BPF_CGROUP_INET4_BIND,
+		BPF_CGROUP_INET4_BIND,
+		AF_INET,
+		SOCK_DGRAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		NULL,
+		SUCCESS,
+	},
+	{
+		"bind6: load prog with wrong expected attach type",
+		bind6_prog_load,
+		BPF_CGROUP_INET4_BIND,
+		BPF_CGROUP_INET6_BIND,
+		AF_INET6,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"bind6: attach prog with wrong attach type",
+		bind6_prog_load,
+		BPF_CGROUP_INET6_BIND,
+		BPF_CGROUP_INET4_BIND,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"bind6: rewrite IP & TCP port in",
+		bind6_prog_load,
+		BPF_CGROUP_INET6_BIND,
+		BPF_CGROUP_INET6_BIND,
+		AF_INET6,
+		SOCK_STREAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		NULL,
+		SUCCESS,
+	},
+	{
+		"bind6: rewrite IP & UDP port in",
+		bind6_prog_load,
+		BPF_CGROUP_INET6_BIND,
+		BPF_CGROUP_INET6_BIND,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		NULL,
+		SUCCESS,
+	},
+
+	/* connect */
+	{
+		"connect4: load prog with wrong expected attach type",
+		connect4_prog_load,
+		BPF_CGROUP_INET6_CONNECT,
+		BPF_CGROUP_INET4_CONNECT,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"connect4: attach prog with wrong attach type",
+		connect4_prog_load,
+		BPF_CGROUP_INET4_CONNECT,
+		BPF_CGROUP_INET6_CONNECT,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"connect4: rewrite IP & TCP port",
+		connect4_prog_load,
+		BPF_CGROUP_INET4_CONNECT,
+		BPF_CGROUP_INET4_CONNECT,
+		AF_INET,
+		SOCK_STREAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		SRC4_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"connect4: rewrite IP & UDP port",
+		connect4_prog_load,
+		BPF_CGROUP_INET4_CONNECT,
+		BPF_CGROUP_INET4_CONNECT,
+		AF_INET,
+		SOCK_DGRAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		SRC4_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"connect6: load prog with wrong expected attach type",
+		connect6_prog_load,
+		BPF_CGROUP_INET4_CONNECT,
+		BPF_CGROUP_INET6_CONNECT,
+		AF_INET6,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"connect6: attach prog with wrong attach type",
+		connect6_prog_load,
+		BPF_CGROUP_INET6_CONNECT,
+		BPF_CGROUP_INET4_CONNECT,
+		AF_INET,
+		SOCK_STREAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"connect6: rewrite IP & TCP port",
+		connect6_prog_load,
+		BPF_CGROUP_INET6_CONNECT,
+		BPF_CGROUP_INET6_CONNECT,
+		AF_INET6,
+		SOCK_STREAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"connect6: rewrite IP & UDP port",
+		connect6_prog_load,
+		BPF_CGROUP_INET6_CONNECT,
+		BPF_CGROUP_INET6_CONNECT,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SUCCESS,
+	},
+
+	/* sendmsg */
+	{
+		"sendmsg4: load prog with wrong expected attach type",
+		sendmsg4_rw_asm_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP4_SENDMSG,
+		AF_INET,
+		SOCK_DGRAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"sendmsg4: attach prog with wrong attach type",
+		sendmsg4_rw_asm_prog_load,
+		BPF_CGROUP_UDP4_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET,
+		SOCK_DGRAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"sendmsg4: rewrite IP & port (asm)",
+		sendmsg4_rw_asm_prog_load,
+		BPF_CGROUP_UDP4_SENDMSG,
+		BPF_CGROUP_UDP4_SENDMSG,
+		AF_INET,
+		SOCK_DGRAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		SRC4_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"sendmsg4: rewrite IP & port (C)",
+		sendmsg4_rw_c_prog_load,
+		BPF_CGROUP_UDP4_SENDMSG,
+		BPF_CGROUP_UDP4_SENDMSG,
+		AF_INET,
+		SOCK_DGRAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		SRC4_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"sendmsg4: deny call",
+		sendmsg_deny_prog_load,
+		BPF_CGROUP_UDP4_SENDMSG,
+		BPF_CGROUP_UDP4_SENDMSG,
+		AF_INET,
+		SOCK_DGRAM,
+		SERV4_IP,
+		SERV4_PORT,
+		SERV4_REWRITE_IP,
+		SERV4_REWRITE_PORT,
+		SRC4_REWRITE_IP,
+		SYSCALL_EPERM,
+	},
+	{
+		"sendmsg6: load prog with wrong expected attach type",
+		sendmsg6_rw_asm_prog_load,
+		BPF_CGROUP_UDP4_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		LOAD_REJECT,
+	},
+	{
+		"sendmsg6: attach prog with wrong attach type",
+		sendmsg6_rw_asm_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP4_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		NULL,
+		0,
+		NULL,
+		0,
+		NULL,
+		ATTACH_REJECT,
+	},
+	{
+		"sendmsg6: rewrite IP & port (asm)",
+		sendmsg6_rw_asm_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"sendmsg6: rewrite IP & port (C)",
+		sendmsg6_rw_c_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"sendmsg6: IPv4-mapped IPv6",
+		sendmsg6_rw_v4mapped_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SYSCALL_ENOTSUPP,
+	},
+	{
+		"sendmsg6: set dst IP = [::] (BSD'ism)",
+		sendmsg6_rw_wildcard_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SUCCESS,
+	},
+	{
+		"sendmsg6: preserve dst IP = [::] (BSD'ism)",
+		sendmsg_allow_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		WILDCARD6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_PORT,
+		SRC6_IP,
+		SUCCESS,
+	},
+	{
+		"sendmsg6: deny call",
+		sendmsg_deny_prog_load,
+		BPF_CGROUP_UDP6_SENDMSG,
+		BPF_CGROUP_UDP6_SENDMSG,
+		AF_INET6,
+		SOCK_DGRAM,
+		SERV6_IP,
+		SERV6_PORT,
+		SERV6_REWRITE_IP,
+		SERV6_REWRITE_PORT,
+		SRC6_REWRITE_IP,
+		SYSCALL_EPERM,
+	},
+};
+
+static int mk_sockaddr(int domain, const char *ip, unsigned short port,
+		       struct sockaddr *addr, socklen_t addr_len)
+{
+	struct sockaddr_in6 *addr6;
+	struct sockaddr_in *addr4;
+
+	if (domain != AF_INET && domain != AF_INET6) {
+		log_err("Unsupported address family");
+		return -1;
+	}
+
+	memset(addr, 0, addr_len);
+
+	if (domain == AF_INET) {
+		if (addr_len < sizeof(struct sockaddr_in))
+			return -1;
+		addr4 = (struct sockaddr_in *)addr;
+		addr4->sin_family = domain;
+		addr4->sin_port = htons(port);
+		if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) {
+			log_err("Invalid IPv4: %s", ip);
+			return -1;
+		}
+	} else if (domain == AF_INET6) {
+		if (addr_len < sizeof(struct sockaddr_in6))
+			return -1;
+		addr6 = (struct sockaddr_in6 *)addr;
+		addr6->sin6_family = domain;
+		addr6->sin6_port = htons(port);
+		if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) {
+			log_err("Invalid IPv6: %s", ip);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int load_insns(const struct sock_addr_test *test,
+		      const struct bpf_insn *insns, size_t insns_cnt)
+{
+	struct bpf_load_program_attr load_attr;
+	int ret;
+
+	memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+	load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+	load_attr.expected_attach_type = test->expected_attach_type;
+	load_attr.insns = insns;
+	load_attr.insns_cnt = insns_cnt;
+	load_attr.license = "GPL";
+
+	ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+	if (ret < 0 && test->expected_result != LOAD_REJECT) {
+		log_err(">>> Loading program error.\n"
+			">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
+	}
+
+	return ret;
+}
+
+/* [1] These testing programs try to read different context fields, including
+ * narrow loads of different sizes from user_ip4 and user_ip6, and write to
+ * those allowed to be overridden.
+ *
+ * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
+ * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
+ * in such cases since it accepts only _signed_ 32bit integer as IMM
+ * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
+ * to count jumps properly.
+ */
+
+static int bind4_prog_load(const struct sock_addr_test *test)
+{
+	union {
+		uint8_t u4_addr8[4];
+		uint16_t u4_addr16[2];
+		uint32_t u4_addr32;
+	} ip4;
+	struct sockaddr_in addr4_rw;
+
+	if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
+		log_err("Invalid IPv4: %s", SERV4_IP);
+		return -1;
+	}
+
+	if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
+			(struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
+		return -1;
+
+	/* See [1]. */
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+		/* if (sk.family == AF_INET && */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, family)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16),
+
+		/*     (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, type)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
+		BPF_JMP_A(1),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12),
+
+		/*     1st_byte_of_user_ip4 == expected && */
+		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip4)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10),
+
+		/*     1st_half_of_user_ip4 == expected && */
+		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip4)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8),
+
+		/*     whole_user_ip4 == expected) { */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip4)),
+		BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
+		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
+
+		/*      user_ip4 = addr4_rw.sin_addr */
+		BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_ip4)),
+
+		/*      user_port = addr4_rw.sin_port */
+		BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_port)),
+		/* } */
+
+		/* return 1 */
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int bind6_prog_load(const struct sock_addr_test *test)
+{
+	struct sockaddr_in6 addr6_rw;
+	struct in6_addr ip6;
+
+	if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
+		log_err("Invalid IPv6: %s", SERV6_IP);
+		return -1;
+	}
+
+	if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
+			(struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
+		return -1;
+
+	/* See [1]. */
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+		/* if (sk.family == AF_INET6 && */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, family)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
+
+		/*            5th_byte_of_user_ip6 == expected && */
+		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip6[1])),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
+
+		/*            3rd_half_of_user_ip6 == expected && */
+		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip6[1])),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
+
+		/*            last_word_of_user_ip6 == expected) { */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_ip6[3])),
+		BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]),  /* See [2]. */
+		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
+
+
+#define STORE_IPV6_WORD(N)						       \
+		BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]),     \
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,		       \
+			    offsetof(struct bpf_sock_addr, user_ip6[N]))
+
+		/*      user_ip6 = addr6_rw.sin6_addr */
+		STORE_IPV6_WORD(0),
+		STORE_IPV6_WORD(1),
+		STORE_IPV6_WORD(2),
+		STORE_IPV6_WORD(3),
+
+		/*      user_port = addr6_rw.sin6_port */
+		BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_port)),
+
+		/* } */
+
+		/* return 1 */
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int load_path(const struct sock_addr_test *test, const char *path)
+{
+	struct bpf_prog_load_attr attr;
+	struct bpf_object *obj;
+	int prog_fd;
+
+	memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+	attr.file = path;
+	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+	attr.expected_attach_type = test->expected_attach_type;
+
+	if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+		if (test->expected_result != LOAD_REJECT)
+			log_err(">>> Loading program (%s) error.\n", path);
+		return -1;
+	}
+
+	return prog_fd;
+}
+
+static int connect4_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, CONNECT4_PROG_PATH);
+}
+
+static int connect6_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, CONNECT6_PROG_PATH);
+}
+
+static int sendmsg_ret_only_prog_load(const struct sock_addr_test *test,
+				      int32_t rc)
+{
+	struct bpf_insn insns[] = {
+		/* return rc */
+		BPF_MOV64_IMM(BPF_REG_0, rc),
+		BPF_EXIT_INSN(),
+	};
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg_allow_prog_load(const struct sock_addr_test *test)
+{
+	return sendmsg_ret_only_prog_load(test, /*rc*/ 1);
+}
+
+static int sendmsg_deny_prog_load(const struct sock_addr_test *test)
+{
+	return sendmsg_ret_only_prog_load(test, /*rc*/ 0);
+}
+
+static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
+{
+	struct sockaddr_in dst4_rw_addr;
+	struct in_addr src4_rw_ip;
+
+	if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) {
+		log_err("Invalid IPv4: %s", SRC4_REWRITE_IP);
+		return -1;
+	}
+
+	if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
+			(struct sockaddr *)&dst4_rw_addr,
+			sizeof(dst4_rw_addr)) == -1)
+		return -1;
+
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+		/* if (sk.family == AF_INET && */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, family)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8),
+
+		/*     sk.type == SOCK_DGRAM)  { */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, type)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6),
+
+		/*      msg_src_ip4 = src4_rw_ip */
+		BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, msg_src_ip4)),
+
+		/*      user_ip4 = dst4_rw_addr.sin_addr */
+		BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_ip4)),
+
+		/*      user_port = dst4_rw_addr.sin_port */
+		BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_port)),
+		/* } */
+
+		/* return 1 */
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, SENDMSG4_PROG_PATH);
+}
+
+static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test,
+					 const char *rw_dst_ip)
+{
+	struct sockaddr_in6 dst6_rw_addr;
+	struct in6_addr src6_rw_ip;
+
+	if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) {
+		log_err("Invalid IPv6: %s", SRC6_REWRITE_IP);
+		return -1;
+	}
+
+	if (mk_sockaddr(AF_INET6, rw_dst_ip, SERV6_REWRITE_PORT,
+			(struct sockaddr *)&dst6_rw_addr,
+			sizeof(dst6_rw_addr)) == -1)
+		return -1;
+
+	struct bpf_insn insns[] = {
+		BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+		/* if (sk.family == AF_INET6) { */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, family)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
+
+#define STORE_IPV6_WORD_N(DST, SRC, N)					       \
+		BPF_MOV32_IMM(BPF_REG_7, SRC[N]),			       \
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,		       \
+			    offsetof(struct bpf_sock_addr, DST[N]))
+
+#define STORE_IPV6(DST, SRC)						       \
+		STORE_IPV6_WORD_N(DST, SRC, 0),				       \
+		STORE_IPV6_WORD_N(DST, SRC, 1),				       \
+		STORE_IPV6_WORD_N(DST, SRC, 2),				       \
+		STORE_IPV6_WORD_N(DST, SRC, 3)
+
+		STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32),
+		STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32),
+
+		/*      user_port = dst6_rw_addr.sin6_port */
+		BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port),
+		BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+			    offsetof(struct bpf_sock_addr, user_port)),
+
+		/* } */
+
+		/* return 1 */
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+
+	return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
+{
+	return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP);
+}
+
+static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test)
+{
+	return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP);
+}
+
+static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test)
+{
+	return sendmsg6_rw_dst_asm_prog_load(test, WILDCARD6_IP);
+}
+
+static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
+{
+	return load_path(test, SENDMSG6_PROG_PATH);
+}
+
+static int cmp_addr(const struct sockaddr_storage *addr1,
+		    const struct sockaddr_storage *addr2, int cmp_port)
+{
+	const struct sockaddr_in *four1, *four2;
+	const struct sockaddr_in6 *six1, *six2;
+
+	if (addr1->ss_family != addr2->ss_family)
+		return -1;
+
+	if (addr1->ss_family == AF_INET) {
+		four1 = (const struct sockaddr_in *)addr1;
+		four2 = (const struct sockaddr_in *)addr2;
+		return !((four1->sin_port == four2->sin_port || !cmp_port) &&
+			 four1->sin_addr.s_addr == four2->sin_addr.s_addr);
+	} else if (addr1->ss_family == AF_INET6) {
+		six1 = (const struct sockaddr_in6 *)addr1;
+		six2 = (const struct sockaddr_in6 *)addr2;
+		return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
+			 !memcmp(&six1->sin6_addr, &six2->sin6_addr,
+				 sizeof(struct in6_addr)));
+	}
+
+	return -1;
+}
+
+static int cmp_sock_addr(info_fn fn, int sock1,
+			 const struct sockaddr_storage *addr2, int cmp_port)
+{
+	struct sockaddr_storage addr1;
+	socklen_t len1 = sizeof(addr1);
+
+	memset(&addr1, 0, len1);
+	if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
+		return -1;
+
+	return cmp_addr(&addr1, addr2, cmp_port);
+}
+
+static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2)
+{
+	return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0);
+}
+
+static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2)
+{
+	return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1);
+}
+
+static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2)
+{
+	return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1);
+}
+
+static int start_server(int type, const struct sockaddr_storage *addr,
+			socklen_t addr_len)
+{
+	int fd;
+
+	fd = socket(addr->ss_family, type, 0);
+	if (fd == -1) {
+		log_err("Failed to create server socket");
+		goto out;
+	}
+
+	if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) {
+		log_err("Failed to bind server socket");
+		goto close_out;
+	}
+
+	if (type == SOCK_STREAM) {
+		if (listen(fd, 128) == -1) {
+			log_err("Failed to listen on server socket");
+			goto close_out;
+		}
+	}
+
+	goto out;
+close_out:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static int connect_to_server(int type, const struct sockaddr_storage *addr,
+			     socklen_t addr_len)
+{
+	int domain;
+	int fd = -1;
+
+	domain = addr->ss_family;
+
+	if (domain != AF_INET && domain != AF_INET6) {
+		log_err("Unsupported address family");
+		goto err;
+	}
+
+	fd = socket(domain, type, 0);
+	if (fd == -1) {
+		log_err("Failed to create client socket");
+		goto err;
+	}
+
+	if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
+		log_err("Fail to connect to server");
+		goto err;
+	}
+
+	goto out;
+err:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+int init_pktinfo(int domain, struct cmsghdr *cmsg)
+{
+	struct in6_pktinfo *pktinfo6;
+	struct in_pktinfo *pktinfo4;
+
+	if (domain == AF_INET) {
+		cmsg->cmsg_level = SOL_IP;
+		cmsg->cmsg_type = IP_PKTINFO;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+		pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg);
+		memset(pktinfo4, 0, sizeof(struct in_pktinfo));
+		if (inet_pton(domain, SRC4_IP,
+			      (void *)&pktinfo4->ipi_spec_dst) != 1)
+			return -1;
+	} else if (domain == AF_INET6) {
+		cmsg->cmsg_level = SOL_IPV6;
+		cmsg->cmsg_type = IPV6_PKTINFO;
+		cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
+		pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+		memset(pktinfo6, 0, sizeof(struct in6_pktinfo));
+		if (inet_pton(domain, SRC6_IP,
+			      (void *)&pktinfo6->ipi6_addr) != 1)
+			return -1;
+	} else {
+		return -1;
+	}
+
+	return 0;
+}
+
+static int sendmsg_to_server(int type, const struct sockaddr_storage *addr,
+			     socklen_t addr_len, int set_cmsg, int flags,
+			     int *syscall_err)
+{
+	union {
+		char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
+		struct cmsghdr align;
+	} control6;
+	union {
+		char buf[CMSG_SPACE(sizeof(struct in_pktinfo))];
+		struct cmsghdr align;
+	} control4;
+	struct msghdr hdr;
+	struct iovec iov;
+	char data = 'a';
+	int domain;
+	int fd = -1;
+
+	domain = addr->ss_family;
+
+	if (domain != AF_INET && domain != AF_INET6) {
+		log_err("Unsupported address family");
+		goto err;
+	}
+
+	fd = socket(domain, type, 0);
+	if (fd == -1) {
+		log_err("Failed to create client socket");
+		goto err;
+	}
+
+	memset(&iov, 0, sizeof(iov));
+	iov.iov_base = &data;
+	iov.iov_len = sizeof(data);
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.msg_name = (void *)addr;
+	hdr.msg_namelen = addr_len;
+	hdr.msg_iov = &iov;
+	hdr.msg_iovlen = 1;
+
+	if (set_cmsg) {
+		if (domain == AF_INET) {
+			hdr.msg_control = &control4;
+			hdr.msg_controllen = sizeof(control4.buf);
+		} else if (domain == AF_INET6) {
+			hdr.msg_control = &control6;
+			hdr.msg_controllen = sizeof(control6.buf);
+		}
+		if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) {
+			log_err("Fail to init pktinfo");
+			goto err;
+		}
+	}
+
+	if (sendmsg(fd, &hdr, flags) != sizeof(data)) {
+		log_err("Fail to send message to server");
+		*syscall_err = errno;
+		goto err;
+	}
+
+	goto out;
+err:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static int fastconnect_to_server(const struct sockaddr_storage *addr,
+				 socklen_t addr_len)
+{
+	int sendmsg_err;
+
+	return sendmsg_to_server(SOCK_STREAM, addr, addr_len, /*set_cmsg*/0,
+				 MSG_FASTOPEN, &sendmsg_err);
+}
+
+static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr)
+{
+	struct timeval tv;
+	struct msghdr hdr;
+	struct iovec iov;
+	char data[64];
+	fd_set rfds;
+
+	FD_ZERO(&rfds);
+	FD_SET(sockfd, &rfds);
+
+	tv.tv_sec = 2;
+	tv.tv_usec = 0;
+
+	if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 ||
+	    !FD_ISSET(sockfd, &rfds))
+		return -1;
+
+	memset(&iov, 0, sizeof(iov));
+	iov.iov_base = data;
+	iov.iov_len = sizeof(data);
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.msg_name = src_addr;
+	hdr.msg_namelen = sizeof(struct sockaddr_storage);
+	hdr.msg_iov = &iov;
+	hdr.msg_iovlen = 1;
+
+	return recvmsg(sockfd, &hdr, 0);
+}
+
+static int init_addrs(const struct sock_addr_test *test,
+		      struct sockaddr_storage *requested_addr,
+		      struct sockaddr_storage *expected_addr,
+		      struct sockaddr_storage *expected_src_addr)
+{
+	socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+	if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port,
+			(struct sockaddr *)expected_addr, addr_len) == -1)
+		goto err;
+
+	if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port,
+			(struct sockaddr *)requested_addr, addr_len) == -1)
+		goto err;
+
+	if (test->expected_src_ip &&
+	    mk_sockaddr(test->domain, test->expected_src_ip, 0,
+			(struct sockaddr *)expected_src_addr, addr_len) == -1)
+		goto err;
+
+	return 0;
+err:
+	return -1;
+}
+
+static int run_bind_test_case(const struct sock_addr_test *test)
+{
+	socklen_t addr_len = sizeof(struct sockaddr_storage);
+	struct sockaddr_storage requested_addr;
+	struct sockaddr_storage expected_addr;
+	int clientfd = -1;
+	int servfd = -1;
+	int err = 0;
+
+	if (init_addrs(test, &requested_addr, &expected_addr, NULL))
+		goto err;
+
+	servfd = start_server(test->type, &requested_addr, addr_len);
+	if (servfd == -1)
+		goto err;
+
+	if (cmp_local_addr(servfd, &expected_addr))
+		goto err;
+
+	/* Try to connect to server just in case */
+	clientfd = connect_to_server(test->type, &expected_addr, addr_len);
+	if (clientfd == -1)
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(clientfd);
+	close(servfd);
+	return err;
+}
+
+static int run_connect_test_case(const struct sock_addr_test *test)
+{
+	socklen_t addr_len = sizeof(struct sockaddr_storage);
+	struct sockaddr_storage expected_src_addr;
+	struct sockaddr_storage requested_addr;
+	struct sockaddr_storage expected_addr;
+	int clientfd = -1;
+	int servfd = -1;
+	int err = 0;
+
+	if (init_addrs(test, &requested_addr, &expected_addr,
+		       &expected_src_addr))
+		goto err;
+
+	/* Prepare server to connect to */
+	servfd = start_server(test->type, &expected_addr, addr_len);
+	if (servfd == -1)
+		goto err;
+
+	clientfd = connect_to_server(test->type, &requested_addr, addr_len);
+	if (clientfd == -1)
+		goto err;
+
+	/* Make sure src and dst addrs were overridden properly */
+	if (cmp_peer_addr(clientfd, &expected_addr))
+		goto err;
+
+	if (cmp_local_ip(clientfd, &expected_src_addr))
+		goto err;
+
+	if (test->type == SOCK_STREAM) {
+		/* Test TCP Fast Open scenario */
+		clientfd = fastconnect_to_server(&requested_addr, addr_len);
+		if (clientfd == -1)
+			goto err;
+
+		/* Make sure src and dst addrs were overridden properly */
+		if (cmp_peer_addr(clientfd, &expected_addr))
+			goto err;
+
+		if (cmp_local_ip(clientfd, &expected_src_addr))
+			goto err;
+	}
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(clientfd);
+	close(servfd);
+	return err;
+}
+
+static int run_sendmsg_test_case(const struct sock_addr_test *test)
+{
+	socklen_t addr_len = sizeof(struct sockaddr_storage);
+	struct sockaddr_storage expected_src_addr;
+	struct sockaddr_storage requested_addr;
+	struct sockaddr_storage expected_addr;
+	struct sockaddr_storage real_src_addr;
+	int clientfd = -1;
+	int servfd = -1;
+	int set_cmsg;
+	int err = 0;
+
+	if (test->type != SOCK_DGRAM)
+		goto err;
+
+	if (init_addrs(test, &requested_addr, &expected_addr,
+		       &expected_src_addr))
+		goto err;
+
+	/* Prepare server to sendmsg to */
+	servfd = start_server(test->type, &expected_addr, addr_len);
+	if (servfd == -1)
+		goto err;
+
+	for (set_cmsg = 0; set_cmsg <= 1; ++set_cmsg) {
+		if (clientfd >= 0)
+			close(clientfd);
+
+		clientfd = sendmsg_to_server(test->type, &requested_addr,
+					     addr_len, set_cmsg, /*flags*/0,
+					     &err);
+		if (err)
+			goto out;
+		else if (clientfd == -1)
+			goto err;
+
+		/* Try to receive message on server instead of using
+		 * getpeername(2) on client socket, to check that client's
+		 * destination address was rewritten properly, since
+		 * getpeername(2) doesn't work with unconnected datagram
+		 * sockets.
+		 *
+		 * Get source address from recvmsg(2) as well to make sure
+		 * source was rewritten properly: getsockname(2) can't be used
+		 * since socket is unconnected and source defined for one
+		 * specific packet may differ from the one used by default and
+		 * returned by getsockname(2).
+		 */
+		if (recvmsg_from_client(servfd, &real_src_addr) == -1)
+			goto err;
+
+		if (cmp_addr(&real_src_addr, &expected_src_addr, /*cmp_port*/0))
+			goto err;
+	}
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(clientfd);
+	close(servfd);
+	return err;
+}
+
+static int run_test_case(int cgfd, const struct sock_addr_test *test)
+{
+	int progfd = -1;
+	int err = 0;
+
+	printf("Test case: %s .. ", test->descr);
+
+	progfd = test->loadfn(test);
+	if (test->expected_result == LOAD_REJECT && progfd < 0)
+		goto out;
+	else if (test->expected_result == LOAD_REJECT || progfd < 0)
+		goto err;
+
+	err = bpf_prog_attach(progfd, cgfd, test->attach_type,
+			      BPF_F_ALLOW_OVERRIDE);
+	if (test->expected_result == ATTACH_REJECT && err) {
+		err = 0; /* error was expected, reset it */
+		goto out;
+	} else if (test->expected_result == ATTACH_REJECT || err) {
+		goto err;
+	}
+
+	switch (test->attach_type) {
+	case BPF_CGROUP_INET4_BIND:
+	case BPF_CGROUP_INET6_BIND:
+		err = run_bind_test_case(test);
+		break;
+	case BPF_CGROUP_INET4_CONNECT:
+	case BPF_CGROUP_INET6_CONNECT:
+		err = run_connect_test_case(test);
+		break;
+	case BPF_CGROUP_UDP4_SENDMSG:
+	case BPF_CGROUP_UDP6_SENDMSG:
+		err = run_sendmsg_test_case(test);
+		break;
+	default:
+		goto err;
+	}
+
+	if (test->expected_result == SYSCALL_EPERM && err == EPERM) {
+		err = 0; /* error was expected, reset it */
+		goto out;
+	}
+
+	if (test->expected_result == SYSCALL_ENOTSUPP && err == ENOTSUPP) {
+		err = 0; /* error was expected, reset it */
+		goto out;
+	}
+
+	if (err || test->expected_result != SUCCESS)
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	/* Detaching w/o checking return code: best effort attempt. */
+	if (progfd != -1)
+		bpf_prog_detach(cgfd, test->attach_type);
+	close(progfd);
+	printf("[%s]\n", err ? "FAIL" : "PASS");
+	return err;
+}
+
+static int run_tests(int cgfd)
+{
+	int passes = 0;
+	int fails = 0;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+		if (run_test_case(cgfd, &tests[i]))
+			++fails;
+		else
+			++passes;
+	}
+	printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+	return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+	int cgfd = -1;
+	int err = 0;
+
+	if (argc < 2) {
+		fprintf(stderr,
+			"%s has to be run via %s.sh. Skip direct run.\n",
+			argv[0], argv[0]);
+		exit(err);
+	}
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cgfd = create_and_get_cgroup(CG_PATH);
+	if (!cgfd)
+		goto err;
+
+	if (join_cgroup(CG_PATH))
+		goto err;
+
+	if (run_tests(cgfd))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(cgfd);
+	cleanup_cgroup_environment();
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
new file mode 100755
index 000000000..9832a875a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+set -eu
+
+ping_once()
+{
+	ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
+}
+
+wait_for_ip()
+{
+	local _i
+	echo -n "Wait for testing IPv4/IPv6 to become available "
+	for _i in $(seq ${MAX_PING_TRIES}); do
+		echo -n "."
+		if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then
+			echo " OK"
+			return
+		fi
+	done
+	echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+	exit 1
+}
+
+setup()
+{
+	# Create testing interfaces not to interfere with current environment.
+	ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
+	ip link set ${TEST_IF} up
+	ip link set ${TEST_IF_PEER} up
+
+	ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
+	ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
+	wait_for_ip
+}
+
+cleanup()
+{
+	ip link del ${TEST_IF} 2>/dev/null || :
+	ip link del ${TEST_IF_PEER} 2>/dev/null || :
+}
+
+main()
+{
+	trap cleanup EXIT 2 3 6 15
+	setup
+	./test_sock_addr setup_done
+}
+
+BASENAME=$(basename $0 .sh)
+TEST_IF="${BASENAME}1"
+TEST_IF_PEER="${BASENAME}2"
+TEST_IPv4="127.0.0.4/8"
+TEST_IPv6="::6/128"
+MAX_PING_TRIES=5
+
+main
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
new file mode 100644
index 000000000..68e108e46
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH			"/foo"
+#define SOCKET_COOKIE_PROG	"./socket_cookie_prog.o"
+
+static int start_server(void)
+{
+	struct sockaddr_in6 addr;
+	int fd;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd == -1) {
+		log_err("Failed to create server socket");
+		goto out;
+	}
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sin6_family = AF_INET6;
+	addr.sin6_addr = in6addr_loopback;
+	addr.sin6_port = 0;
+
+	if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) {
+		log_err("Failed to bind server socket");
+		goto close_out;
+	}
+
+	if (listen(fd, 128) == -1) {
+		log_err("Failed to listen on server socket");
+		goto close_out;
+	}
+
+	goto out;
+
+close_out:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static int connect_to_server(int server_fd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd;
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd == -1) {
+		log_err("Failed to create client socket");
+		goto out;
+	}
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		goto close_out;
+	}
+
+	if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+		log_err("Fail to connect to server");
+		goto close_out;
+	}
+
+	goto out;
+
+close_out:
+	close(fd);
+	fd = -1;
+out:
+	return fd;
+}
+
+static int validate_map(struct bpf_map *map, int client_fd)
+{
+	__u32 cookie_expected_value;
+	struct sockaddr_in6 addr;
+	socklen_t len = sizeof(addr);
+	__u32 cookie_value;
+	__u64 cookie_key;
+	int err = 0;
+	int map_fd;
+
+	if (!map) {
+		log_err("Map not found in BPF object");
+		goto err;
+	}
+
+	map_fd = bpf_map__fd(map);
+
+	err = bpf_map_get_next_key(map_fd, NULL, &cookie_key);
+	if (err) {
+		log_err("Can't get cookie key from map");
+		goto out;
+	}
+
+	err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value);
+	if (err) {
+		log_err("Can't get cookie value from map");
+		goto out;
+	}
+
+	err = getsockname(client_fd, (struct sockaddr *)&addr, &len);
+	if (err) {
+		log_err("Can't get client local addr");
+		goto out;
+	}
+
+	cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
+	if (cookie_value != cookie_expected_value) {
+		log_err("Unexpected value in map: %x != %x", cookie_value,
+			cookie_expected_value);
+		goto err;
+	}
+
+	goto out;
+err:
+	err = -1;
+out:
+	return err;
+}
+
+static int run_test(int cgfd)
+{
+	enum bpf_attach_type attach_type;
+	struct bpf_prog_load_attr attr;
+	struct bpf_program *prog;
+	struct bpf_object *pobj;
+	const char *prog_name;
+	int server_fd = -1;
+	int client_fd = -1;
+	int prog_fd = -1;
+	int err = 0;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.file = SOCKET_COOKIE_PROG;
+	attr.prog_type = BPF_PROG_TYPE_UNSPEC;
+
+	err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd);
+	if (err) {
+		log_err("Failed to load %s", attr.file);
+		goto out;
+	}
+
+	bpf_object__for_each_program(prog, pobj) {
+		prog_name = bpf_program__title(prog, /*needs_copy*/ false);
+
+		if (strcmp(prog_name, "cgroup/connect6") == 0) {
+			attach_type = BPF_CGROUP_INET6_CONNECT;
+		} else if (strcmp(prog_name, "sockops") == 0) {
+			attach_type = BPF_CGROUP_SOCK_OPS;
+		} else {
+			log_err("Unexpected prog: %s", prog_name);
+			goto err;
+		}
+
+		err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type,
+				      BPF_F_ALLOW_OVERRIDE);
+		if (err) {
+			log_err("Failed to attach prog %s", prog_name);
+			goto out;
+		}
+	}
+
+	server_fd = start_server();
+	if (server_fd == -1)
+		goto err;
+
+	client_fd = connect_to_server(server_fd);
+	if (client_fd == -1)
+		goto err;
+
+	if (validate_map(bpf_map__next(NULL, pobj), client_fd))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(client_fd);
+	close(server_fd);
+	bpf_object__close(pobj);
+	printf("%s\n", err ? "FAILED" : "PASSED");
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	int cgfd = -1;
+	int err = 0;
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cgfd = create_and_get_cgroup(CG_PATH);
+	if (!cgfd)
+		goto err;
+
+	if (join_cgroup(CG_PATH))
+		goto err;
+
+	if (run_test(cgfd))
+		goto err;
+
+	goto out;
+err:
+	err = -1;
+out:
+	close(cgfd);
+	cleanup_cgroup_environment();
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sockhash_kern.c b/tools/testing/selftests/bpf/test_sockhash_kern.c
new file mode 100644
index 000000000..e67559164
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockhash_kern.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#undef SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH
+#include "./test_sockmap_kern.h"
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
new file mode 100644
index 000000000..a7fc91bb9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -0,0 +1,1527 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <sched.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/sendfile.h>
+
+#include <linux/netlink.h>
+#include <linux/socket.h>
+#include <linux/sock_diag.h>
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <libgen.h>
+
+#include <getopt.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+int running;
+static void running_handler(int a);
+
+/* randomly selected ports for testing on lo */
+#define S1_PORT 10000
+#define S2_PORT 10001
+
+#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
+#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
+#define CG_PATH "/sockmap"
+
+/* global sockets */
+int s1, s2, c1, c2, p1, p2;
+int test_cnt;
+int passed;
+int failed;
+int map_fd[8];
+struct bpf_map *maps[8];
+int prog_fd[11];
+
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+int txmsg_ingress;
+int txmsg_skb;
+
+static const struct option long_options[] = {
+	{"help",	no_argument,		NULL, 'h' },
+	{"cgroup",	required_argument,	NULL, 'c' },
+	{"rate",	required_argument,	NULL, 'r' },
+	{"verbose",	no_argument,		NULL, 'v' },
+	{"iov_count",	required_argument,	NULL, 'i' },
+	{"length",	required_argument,	NULL, 'l' },
+	{"test",	required_argument,	NULL, 't' },
+	{"data_test",   no_argument,		NULL, 'd' },
+	{"txmsg",		no_argument,	&txmsg_pass,  1  },
+	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
+	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
+	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
+	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
+	{"txmsg_apply",	required_argument,	NULL, 'a'},
+	{"txmsg_cork",	required_argument,	NULL, 'k'},
+	{"txmsg_start", required_argument,	NULL, 's'},
+	{"txmsg_end",	required_argument,	NULL, 'e'},
+	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
+	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
+	{0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+	int i;
+
+	printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
+	printf(" options:\n");
+	for (i = 0; long_options[i].name != 0; i++) {
+		printf(" --%-12s", long_options[i].name);
+		if (long_options[i].flag != NULL)
+			printf(" flag (internal value:%d)\n",
+				*long_options[i].flag);
+		else
+			printf(" -%c\n", long_options[i].val);
+	}
+	printf("\n");
+}
+
+static int sockmap_init_sockets(int verbose)
+{
+	int i, err, one = 1;
+	struct sockaddr_in addr;
+	int *fds[4] = {&s1, &s2, &c1, &c2};
+
+	s1 = s2 = p1 = p2 = c1 = c2 = 0;
+
+	/* Init sockets */
+	for (i = 0; i < 4; i++) {
+		*fds[i] = socket(AF_INET, SOCK_STREAM, 0);
+		if (*fds[i] < 0) {
+			perror("socket s1 failed()");
+			return errno;
+		}
+	}
+
+	/* Allow reuse */
+	for (i = 0; i < 2; i++) {
+		err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
+				 (char *)&one, sizeof(one));
+		if (err) {
+			perror("setsockopt failed()");
+			return errno;
+		}
+	}
+
+	/* Non-blocking sockets */
+	for (i = 0; i < 2; i++) {
+		err = ioctl(*fds[i], FIONBIO, (char *)&one);
+		if (err < 0) {
+			perror("ioctl s1 failed()");
+			return errno;
+		}
+	}
+
+	/* Bind server sockets */
+	memset(&addr, 0, sizeof(struct sockaddr_in));
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+	addr.sin_port = htons(S1_PORT);
+	err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0) {
+		perror("bind s1 failed()\n");
+		return errno;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0) {
+		perror("bind s2 failed()\n");
+		return errno;
+	}
+
+	/* Listen server sockets */
+	addr.sin_port = htons(S1_PORT);
+	err = listen(s1, 32);
+	if (err < 0) {
+		perror("listen s1 failed()\n");
+		return errno;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = listen(s2, 32);
+	if (err < 0) {
+		perror("listen s1 failed()\n");
+		return errno;
+	}
+
+	/* Initiate Connect */
+	addr.sin_port = htons(S1_PORT);
+	err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0 && errno != EINPROGRESS) {
+		perror("connect c1 failed()\n");
+		return errno;
+	}
+
+	addr.sin_port = htons(S2_PORT);
+	err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
+	if (err < 0 && errno != EINPROGRESS) {
+		perror("connect c2 failed()\n");
+		return errno;
+	} else if (err < 0) {
+		err = 0;
+	}
+
+	/* Accept Connecrtions */
+	p1 = accept(s1, NULL, NULL);
+	if (p1 < 0) {
+		perror("accept s1 failed()\n");
+		return errno;
+	}
+
+	p2 = accept(s2, NULL, NULL);
+	if (p2 < 0) {
+		perror("accept s1 failed()\n");
+		return errno;
+	}
+
+	if (verbose) {
+		printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
+		printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
+			c1, s1, c2, s2);
+	}
+	return 0;
+}
+
+struct msg_stats {
+	size_t bytes_sent;
+	size_t bytes_recvd;
+	struct timespec start;
+	struct timespec end;
+};
+
+struct sockmap_options {
+	int verbose;
+	bool base;
+	bool sendpage;
+	bool data_test;
+	bool drop_expected;
+	int iov_count;
+	int iov_length;
+	int rate;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+			     struct msg_stats *s,
+			     struct sockmap_options *opt)
+{
+	bool drop = opt->drop_expected;
+	unsigned char k = 0;
+	FILE *file;
+	int i, fp;
+
+	file = fopen(".sendpage_tst.tmp", "w+");
+	if (!file) {
+		perror("create file for sendpage");
+		return 1;
+	}
+	for (i = 0; i < iov_length * cnt; i++, k++)
+		fwrite(&k, sizeof(char), 1, file);
+	fflush(file);
+	fseek(file, 0, SEEK_SET);
+	fclose(file);
+
+	fp = open(".sendpage_tst.tmp", O_RDONLY);
+	if (fp < 0) {
+		perror("reopen file for sendpage");
+		return 1;
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &s->start);
+	for (i = 0; i < cnt; i++) {
+		int sent = sendfile(fd, fp, NULL, iov_length);
+
+		if (!drop && sent < 0) {
+			perror("send loop error:");
+			close(fp);
+			return sent;
+		} else if (drop && sent >= 0) {
+			printf("sendpage loop error expected: %i\n", sent);
+			close(fp);
+			return -EIO;
+		}
+
+		if (sent > 0)
+			s->bytes_sent += sent;
+	}
+	clock_gettime(CLOCK_MONOTONIC, &s->end);
+	close(fp);
+	return 0;
+}
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+		    struct msg_stats *s, bool tx,
+		    struct sockmap_options *opt)
+{
+	struct msghdr msg = {0};
+	int err, i, flags = MSG_NOSIGNAL;
+	struct iovec *iov;
+	unsigned char k;
+	bool data_test = opt->data_test;
+	bool drop = opt->drop_expected;
+
+	iov = calloc(iov_count, sizeof(struct iovec));
+	if (!iov)
+		return errno;
+
+	k = 0;
+	for (i = 0; i < iov_count; i++) {
+		unsigned char *d = calloc(iov_length, sizeof(char));
+
+		if (!d) {
+			fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
+			goto out_errno;
+		}
+		iov[i].iov_base = d;
+		iov[i].iov_len = iov_length;
+
+		if (data_test && tx) {
+			int j;
+
+			for (j = 0; j < iov_length; j++)
+				d[j] = k++;
+		}
+	}
+
+	msg.msg_iov = iov;
+	msg.msg_iovlen = iov_count;
+	k = 0;
+
+	if (tx) {
+		clock_gettime(CLOCK_MONOTONIC, &s->start);
+		for (i = 0; i < cnt; i++) {
+			int sent = sendmsg(fd, &msg, flags);
+
+			if (!drop && sent < 0) {
+				perror("send loop error:");
+				goto out_errno;
+			} else if (drop && sent >= 0) {
+				printf("send loop error expected: %i\n", sent);
+				errno = -EIO;
+				goto out_errno;
+			}
+			if (sent > 0)
+				s->bytes_sent += sent;
+		}
+		clock_gettime(CLOCK_MONOTONIC, &s->end);
+	} else {
+		int slct, recv, max_fd = fd;
+		int fd_flags = O_NONBLOCK;
+		struct timeval timeout;
+		float total_bytes;
+		int bytes_cnt = 0;
+		int chunk_sz;
+		fd_set w;
+
+		if (opt->sendpage)
+			chunk_sz = iov_length * cnt;
+		else
+			chunk_sz = iov_length * iov_count;
+
+		fcntl(fd, fd_flags);
+		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
+		if (err < 0)
+			perror("recv start time: ");
+		while (s->bytes_recvd < total_bytes) {
+			if (txmsg_cork) {
+				timeout.tv_sec = 0;
+				timeout.tv_usec = 300000;
+			} else {
+				timeout.tv_sec = 1;
+				timeout.tv_usec = 0;
+			}
+
+			/* FD sets */
+			FD_ZERO(&w);
+			FD_SET(fd, &w);
+
+			slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
+			if (slct == -1) {
+				perror("select()");
+				clock_gettime(CLOCK_MONOTONIC, &s->end);
+				goto out_errno;
+			} else if (!slct) {
+				if (opt->verbose)
+					fprintf(stderr, "unexpected timeout\n");
+				errno = -EIO;
+				clock_gettime(CLOCK_MONOTONIC, &s->end);
+				goto out_errno;
+			}
+
+			recv = recvmsg(fd, &msg, flags);
+			if (recv < 0) {
+				if (errno != EWOULDBLOCK) {
+					clock_gettime(CLOCK_MONOTONIC, &s->end);
+					perror("recv failed()\n");
+					goto out_errno;
+				}
+			}
+
+			s->bytes_recvd += recv;
+
+			if (data_test) {
+				int j;
+
+				for (i = 0; i < msg.msg_iovlen; i++) {
+					unsigned char *d = iov[i].iov_base;
+
+					for (j = 0;
+					     j < iov[i].iov_len && recv; j++) {
+						if (d[j] != k++) {
+							errno = -EIO;
+							fprintf(stderr,
+								"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+								i, j, d[j], k - 1, d[j+1], k);
+							goto out_errno;
+						}
+						bytes_cnt++;
+						if (bytes_cnt == chunk_sz) {
+							k = 0;
+							bytes_cnt = 0;
+						}
+						recv--;
+					}
+				}
+			}
+		}
+		clock_gettime(CLOCK_MONOTONIC, &s->end);
+	}
+
+	for (i = 0; i < iov_count; i++)
+		free(iov[i].iov_base);
+	free(iov);
+	return 0;
+out_errno:
+	for (i = 0; i < iov_count; i++)
+		free(iov[i].iov_base);
+	free(iov);
+	return errno;
+}
+
+static float giga = 1000000000;
+
+static inline float sentBps(struct msg_stats s)
+{
+	return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static inline float recvdBps(struct msg_stats s)
+{
+	return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static int sendmsg_test(struct sockmap_options *opt)
+{
+	float sent_Bps = 0, recvd_Bps = 0;
+	int rx_fd, txpid, rxpid, err = 0;
+	struct msg_stats s = {0};
+	int iov_count = opt->iov_count;
+	int iov_buf = opt->iov_length;
+	int rx_status, tx_status;
+	int cnt = opt->rate;
+
+	errno = 0;
+
+	if (opt->base)
+		rx_fd = p1;
+	else
+		rx_fd = p2;
+
+	rxpid = fork();
+	if (rxpid == 0) {
+		if (opt->drop_expected)
+			exit(0);
+
+		if (opt->sendpage)
+			iov_count = 1;
+		err = msg_loop(rx_fd, iov_count, iov_buf,
+			       cnt, &s, false, opt);
+		if (err && opt->verbose)
+			fprintf(stderr,
+				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
+				iov_count, iov_buf, cnt, err);
+		shutdown(p2, SHUT_RDWR);
+		shutdown(p1, SHUT_RDWR);
+		if (s.end.tv_sec - s.start.tv_sec) {
+			sent_Bps = sentBps(s);
+			recvd_Bps = recvdBps(s);
+		}
+		if (opt->verbose)
+			fprintf(stdout,
+				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+				s.bytes_sent, sent_Bps, sent_Bps/giga,
+				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+		if (err && txmsg_cork)
+			err = 0;
+		exit(err ? 1 : 0);
+	} else if (rxpid == -1) {
+		perror("msg_loop_rx: ");
+		return errno;
+	}
+
+	txpid = fork();
+	if (txpid == 0) {
+		if (opt->sendpage)
+			err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+		else
+			err = msg_loop(c1, iov_count, iov_buf,
+				       cnt, &s, true, opt);
+
+		if (err)
+			fprintf(stderr,
+				"msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
+				iov_count, iov_buf, cnt, err);
+		shutdown(c1, SHUT_RDWR);
+		if (s.end.tv_sec - s.start.tv_sec) {
+			sent_Bps = sentBps(s);
+			recvd_Bps = recvdBps(s);
+		}
+		if (opt->verbose)
+			fprintf(stdout,
+				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+				s.bytes_sent, sent_Bps, sent_Bps/giga,
+				s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+		exit(err ? 1 : 0);
+	} else if (txpid == -1) {
+		perror("msg_loop_tx: ");
+		return errno;
+	}
+
+	assert(waitpid(rxpid, &rx_status, 0) == rxpid);
+	assert(waitpid(txpid, &tx_status, 0) == txpid);
+	if (WIFEXITED(rx_status)) {
+		err = WEXITSTATUS(rx_status);
+		if (err) {
+			fprintf(stderr, "rx thread exited with err %d. ", err);
+			goto out;
+		}
+	}
+	if (WIFEXITED(tx_status)) {
+		err = WEXITSTATUS(tx_status);
+		if (err)
+			fprintf(stderr, "tx thread exited with err %d. ", err);
+	}
+out:
+	return err;
+}
+
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
+{
+	struct timeval timeout;
+	char buf[1024] = {0};
+	int sc;
+
+	timeout.tv_sec = 10;
+	timeout.tv_usec = 0;
+
+	/* Ping/Pong data from client to server */
+	sc = send(c1, buf, sizeof(buf), 0);
+	if (sc < 0) {
+		perror("send failed()\n");
+		return sc;
+	}
+
+	do {
+		int s, rc, i, max_fd = p2;
+		fd_set w;
+
+		/* FD sets */
+		FD_ZERO(&w);
+		FD_SET(c1, &w);
+		FD_SET(c2, &w);
+		FD_SET(p1, &w);
+		FD_SET(p2, &w);
+
+		s = select(max_fd + 1, &w, NULL, NULL, &timeout);
+		if (s == -1) {
+			perror("select()");
+			break;
+		} else if (!s) {
+			fprintf(stderr, "unexpected timeout\n");
+			break;
+		}
+
+		for (i = 0; i <= max_fd && s > 0; ++i) {
+			if (!FD_ISSET(i, &w))
+				continue;
+
+			s--;
+
+			rc = recv(i, buf, sizeof(buf), 0);
+			if (rc < 0) {
+				if (errno != EWOULDBLOCK) {
+					perror("recv failed()\n");
+					return rc;
+				}
+			}
+
+			if (rc == 0) {
+				close(i);
+				break;
+			}
+
+			sc = send(i, buf, rc, 0);
+			if (sc < 0) {
+				perror("send failed()\n");
+				return sc;
+			}
+		}
+
+		if (rate)
+			sleep(rate);
+
+		if (opt->verbose) {
+			printf(".");
+			fflush(stdout);
+
+		}
+	} while (running);
+
+	return 0;
+}
+
+enum {
+	PING_PONG,
+	SENDMSG,
+	BASE,
+	BASE_SENDPAGE,
+	SENDPAGE,
+};
+
+static int run_options(struct sockmap_options *options, int cg_fd,  int test)
+{
+	int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
+
+	/* If base test skip BPF setup */
+	if (test == BASE || test == BASE_SENDPAGE)
+		goto run;
+
+	/* Attach programs to sockmap */
+	err = bpf_prog_attach(prog_fd[0], map_fd[0],
+				BPF_SK_SKB_STREAM_PARSER, 0);
+	if (err) {
+		fprintf(stderr,
+			"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+			prog_fd[0], map_fd[0], err, strerror(errno));
+		return err;
+	}
+
+	err = bpf_prog_attach(prog_fd[1], map_fd[0],
+				BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err) {
+		fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
+			err, strerror(errno));
+		return err;
+	}
+
+	/* Attach to cgroups */
+	err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (err) {
+		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
+			err, strerror(errno));
+		return err;
+	}
+
+run:
+	err = sockmap_init_sockets(options->verbose);
+	if (err) {
+		fprintf(stderr, "ERROR: test socket failed: %d\n", err);
+		goto out;
+	}
+
+	/* Attach txmsg program to sockmap */
+	if (txmsg_pass)
+		tx_prog_fd = prog_fd[3];
+	else if (txmsg_noisy)
+		tx_prog_fd = prog_fd[4];
+	else if (txmsg_redir)
+		tx_prog_fd = prog_fd[5];
+	else if (txmsg_redir_noisy)
+		tx_prog_fd = prog_fd[6];
+	else if (txmsg_drop)
+		tx_prog_fd = prog_fd[9];
+	/* apply and cork must be last */
+	else if (txmsg_apply)
+		tx_prog_fd = prog_fd[7];
+	else if (txmsg_cork)
+		tx_prog_fd = prog_fd[8];
+	else
+		tx_prog_fd = 0;
+
+	if (tx_prog_fd) {
+		int redir_fd, i = 0;
+
+		err = bpf_prog_attach(tx_prog_fd,
+				      map_fd[1], BPF_SK_MSG_VERDICT, 0);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+				err, strerror(errno));
+			goto out;
+		}
+
+		err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			goto out;
+		}
+
+		if (txmsg_redir || txmsg_redir_noisy)
+			redir_fd = c2;
+		else
+			redir_fd = c1;
+
+		err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_map_update_elem (txmsg):  %d (%s\n",
+				err, strerror(errno));
+			goto out;
+		}
+
+		if (txmsg_apply) {
+			err = bpf_map_update_elem(map_fd[3],
+						  &i, &txmsg_apply, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (apply_bytes):  %d (%s\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_cork) {
+			err = bpf_map_update_elem(map_fd[4],
+						  &i, &txmsg_cork, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (cork_bytes):  %d (%s\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_start) {
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_start, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_start):  %d (%s)\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_end) {
+			i = 1;
+			err = bpf_map_update_elem(map_fd[5],
+						  &i, &txmsg_end, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_end):  %d (%s)\n",
+					err, strerror(errno));
+				goto out;
+			}
+		}
+
+		if (txmsg_ingress) {
+			int in = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+					err, strerror(errno));
+			}
+			i = 1;
+			err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+					err, strerror(errno));
+			}
+			err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			i = 2;
+			err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+					err, strerror(errno));
+			}
+		}
+
+		if (txmsg_skb) {
+			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
+					p2 : p1;
+			int ingress = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[7],
+						  &i, &ingress, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			i = 3;
+			err = bpf_map_update_elem(map_fd[0],
+						  &i, &skb_fd, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+					err, strerror(errno));
+			}
+		}
+	}
+
+	if (txmsg_drop)
+		options->drop_expected = true;
+
+	if (test == PING_PONG)
+		err = forever_ping_pong(options->rate, options);
+	else if (test == SENDMSG) {
+		options->base = false;
+		options->sendpage = false;
+		err = sendmsg_test(options);
+	} else if (test == SENDPAGE) {
+		options->base = false;
+		options->sendpage = true;
+		err = sendmsg_test(options);
+	} else if (test == BASE) {
+		options->base = true;
+		options->sendpage = false;
+		err = sendmsg_test(options);
+	} else if (test == BASE_SENDPAGE) {
+		options->base = true;
+		options->sendpage = true;
+		err = sendmsg_test(options);
+	} else
+		fprintf(stderr, "unknown test\n");
+out:
+	/* Detatch and zero all the maps */
+	bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+	bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
+	bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+	if (tx_prog_fd >= 0)
+		bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+
+	for (i = 0; i < 8; i++) {
+		key = next_key = 0;
+		bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+		while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
+			bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+			key = next_key;
+		}
+	}
+
+	close(s1);
+	close(s2);
+	close(p1);
+	close(p2);
+	close(c1);
+	close(c2);
+	return err;
+}
+
+static char *test_to_str(int test)
+{
+	switch (test) {
+	case SENDMSG:
+		return "sendmsg";
+	case SENDPAGE:
+		return "sendpage";
+	}
+	return "unknown";
+}
+
+#define OPTSTRING 60
+static void test_options(char *options)
+{
+	char tstr[OPTSTRING];
+
+	memset(options, 0, OPTSTRING);
+
+	if (txmsg_pass)
+		strncat(options, "pass,", OPTSTRING);
+	if (txmsg_noisy)
+		strncat(options, "pass_noisy,", OPTSTRING);
+	if (txmsg_redir)
+		strncat(options, "redir,", OPTSTRING);
+	if (txmsg_redir_noisy)
+		strncat(options, "redir_noisy,", OPTSTRING);
+	if (txmsg_drop)
+		strncat(options, "drop,", OPTSTRING);
+	if (txmsg_apply) {
+		snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_cork) {
+		snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_start) {
+		snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_end) {
+		snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
+		strncat(options, tstr, OPTSTRING);
+	}
+	if (txmsg_ingress)
+		strncat(options, "ingress,", OPTSTRING);
+	if (txmsg_skb)
+		strncat(options, "skb,", OPTSTRING);
+}
+
+static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
+{
+	char *options = calloc(OPTSTRING, sizeof(char));
+	int err;
+
+	if (test == SENDPAGE)
+		opt->sendpage = true;
+	else
+		opt->sendpage = false;
+
+	if (txmsg_drop)
+		opt->drop_expected = true;
+	else
+		opt->drop_expected = false;
+
+	test_options(options);
+
+	fprintf(stdout,
+		"[TEST %i]: (%i, %i, %i, %s, %s): ",
+		test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+		test_to_str(test), options);
+	fflush(stdout);
+	err = run_options(opt, cgrp, test);
+	fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+	test_cnt++;
+	!err ? passed++ : failed++;
+	free(options);
+	return err;
+}
+
+static int test_exec(int cgrp, struct sockmap_options *opt)
+{
+	int err = __test_exec(cgrp, SENDMSG, opt);
+
+	if (err)
+		goto out;
+
+	err = __test_exec(cgrp, SENDPAGE, opt);
+out:
+	return err;
+}
+
+static int test_loop(int cgrp)
+{
+	struct sockmap_options opt;
+
+	int err, i, l, r;
+
+	opt.verbose = 0;
+	opt.base = false;
+	opt.sendpage = false;
+	opt.data_test = false;
+	opt.drop_expected = false;
+	opt.iov_count = 0;
+	opt.iov_length = 0;
+	opt.rate = 0;
+
+	r = 1;
+	for (i = 1; i < 100; i += 33) {
+		for (l = 1; l < 100; l += 33) {
+			opt.rate = r;
+			opt.iov_count = i;
+			opt.iov_length = l;
+			err = test_exec(cgrp, &opt);
+			if (err)
+				goto out;
+		}
+	}
+	sched_yield();
+out:
+	return err;
+}
+
+static int test_txmsg(int cgrp)
+{
+	int err;
+
+	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+	txmsg_apply = txmsg_cork = 0;
+	txmsg_ingress = txmsg_skb = 0;
+
+	txmsg_pass = 1;
+	err = test_loop(cgrp);
+	txmsg_pass = 0;
+	if (err)
+		goto out;
+
+	txmsg_redir = 1;
+	err = test_loop(cgrp);
+	txmsg_redir = 0;
+	if (err)
+		goto out;
+
+	txmsg_drop = 1;
+	err = test_loop(cgrp);
+	txmsg_drop = 0;
+	if (err)
+		goto out;
+
+	txmsg_redir = 1;
+	txmsg_ingress = 1;
+	err = test_loop(cgrp);
+	txmsg_redir = 0;
+	txmsg_ingress = 0;
+	if (err)
+		goto out;
+out:
+	txmsg_pass = 0;
+	txmsg_redir = 0;
+	txmsg_drop = 0;
+	return err;
+}
+
+static int test_send(struct sockmap_options *opt, int cgrp)
+{
+	int err;
+
+	opt->iov_length = 1;
+	opt->iov_count = 1;
+	opt->rate = 1;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 1;
+	opt->iov_count = 1024;
+	opt->rate = 1;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 1024;
+	opt->iov_count = 1;
+	opt->rate = 1;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 1;
+	opt->iov_count = 1;
+	opt->rate = 512;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->iov_length = 256;
+	opt->iov_count = 1024;
+	opt->rate = 2;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+
+	opt->rate = 100;
+	opt->iov_count = 1;
+	opt->iov_length = 5;
+	err = test_exec(cgrp, opt);
+	if (err)
+		goto out;
+out:
+	sched_yield();
+	return err;
+}
+
+static int test_mixed(int cgrp)
+{
+	struct sockmap_options opt = {0};
+	int err;
+
+	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+	txmsg_apply = txmsg_cork = 0;
+	txmsg_start = txmsg_end = 0;
+	/* Test small and large iov_count values with pass/redir/apply/cork */
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 0;
+	txmsg_cork = 1;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 1;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 0;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1024;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_cork = 4096;
+	txmsg_apply = 4096;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 0;
+	txmsg_cork = 1;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 0;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1024;
+	txmsg_cork = 1024;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_cork = 4096;
+	txmsg_apply = 4096;
+	err = test_send(&opt, cgrp);
+	if (err)
+		goto out;
+out:
+	return err;
+}
+
+static int test_start_end(int cgrp)
+{
+	struct sockmap_options opt = {0};
+	int err, i;
+
+	/* Test basic start/end with lots of iov_count and iov_lengths */
+	txmsg_start = 1;
+	txmsg_end = 2;
+	err = test_txmsg(cgrp);
+	if (err)
+		goto out;
+
+	/* Test start/end with cork */
+	opt.rate = 16;
+	opt.iov_count = 1;
+	opt.iov_length = 100;
+	txmsg_cork = 1600;
+
+	for (i = 99; i <= 1600; i += 500) {
+		txmsg_start = 0;
+		txmsg_end = i;
+		err = test_exec(cgrp, &opt);
+		if (err)
+			goto out;
+	}
+
+	/* Test start/end with cork but pull data in middle */
+	for (i = 199; i <= 1600; i += 500) {
+		txmsg_start = 100;
+		txmsg_end = i;
+		err = test_exec(cgrp, &opt);
+		if (err)
+			goto out;
+	}
+
+	/* Test start/end with cork pulling last sg entry */
+	txmsg_start = 1500;
+	txmsg_end = 1600;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end pull of single byte in last page */
+	txmsg_start = 1111;
+	txmsg_end = 1112;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end with end < start */
+	txmsg_start = 1111;
+	txmsg_end = 0;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end with end > data */
+	txmsg_start = 0;
+	txmsg_end = 1601;
+	err = test_exec(cgrp, &opt);
+	if (err)
+		goto out;
+
+	/* Test start/end with start > data */
+	txmsg_start = 1601;
+	txmsg_end = 1600;
+	err = test_exec(cgrp, &opt);
+
+out:
+	txmsg_start = 0;
+	txmsg_end = 0;
+	sched_yield();
+	return err;
+}
+
+char *map_names[] = {
+	"sock_map",
+	"sock_map_txmsg",
+	"sock_map_redir",
+	"sock_apply_bytes",
+	"sock_cork_bytes",
+	"sock_pull_bytes",
+	"sock_redir_flags",
+	"sock_skb_opts",
+};
+
+int prog_attach_type[] = {
+	BPF_SK_SKB_STREAM_PARSER,
+	BPF_SK_SKB_STREAM_VERDICT,
+	BPF_CGROUP_SOCK_OPS,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+	BPF_SK_MSG_VERDICT,
+};
+
+int prog_type[] = {
+	BPF_PROG_TYPE_SK_SKB,
+	BPF_PROG_TYPE_SK_SKB,
+	BPF_PROG_TYPE_SOCK_OPS,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+	BPF_PROG_TYPE_SK_MSG,
+};
+
+static int populate_progs(char *bpf_file)
+{
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int i = 0;
+	long err;
+
+	obj = bpf_object__open(bpf_file);
+	err = libbpf_get_error(obj);
+	if (err) {
+		char err_buf[256];
+
+		libbpf_strerror(err, err_buf, sizeof(err_buf));
+		printf("Unable to load eBPF objects in file '%s' : %s\n",
+		       bpf_file, err_buf);
+		return -1;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		bpf_program__set_type(prog, prog_type[i]);
+		bpf_program__set_expected_attach_type(prog,
+						      prog_attach_type[i]);
+		i++;
+	}
+
+	i = bpf_object__load(obj);
+	i = 0;
+	bpf_object__for_each_program(prog, obj) {
+		prog_fd[i] = bpf_program__fd(prog);
+		i++;
+	}
+
+	for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+		maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
+		map_fd[i] = bpf_map__fd(maps[i]);
+		if (map_fd[i] < 0) {
+			fprintf(stderr, "load_bpf_file: (%i) %s\n",
+				map_fd[i], strerror(errno));
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+static int __test_suite(char *bpf_file)
+{
+	int cg_fd, err;
+
+	err = populate_progs(bpf_file);
+	if (err < 0) {
+		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
+		return err;
+	}
+
+	if (setup_cgroup_environment()) {
+		fprintf(stderr, "ERROR: cgroup env failed\n");
+		return -EINVAL;
+	}
+
+	cg_fd = create_and_get_cgroup(CG_PATH);
+	if (cg_fd < 0) {
+		fprintf(stderr,
+			"ERROR: (%i) open cg path failed: %s\n",
+			cg_fd, optarg);
+		return cg_fd;
+	}
+
+	if (join_cgroup(CG_PATH)) {
+		fprintf(stderr, "ERROR: failed to join cgroup\n");
+		return -EINVAL;
+	}
+
+	/* Tests basic commands and APIs with range of iov values */
+	txmsg_start = txmsg_end = 0;
+	err = test_txmsg(cg_fd);
+	if (err)
+		goto out;
+
+	/* Tests interesting combinations of APIs used together */
+	err = test_mixed(cg_fd);
+	if (err)
+		goto out;
+
+	/* Tests pull_data API using start/end API */
+	err = test_start_end(cg_fd);
+	if (err)
+		goto out;
+
+out:
+	printf("Summary: %i PASSED %i FAILED\n", passed, failed);
+	cleanup_cgroup_environment();
+	close(cg_fd);
+	return err;
+}
+
+static int test_suite(void)
+{
+	int err;
+
+	err = __test_suite(BPF_SOCKMAP_FILENAME);
+	if (err)
+		goto out;
+	err = __test_suite(BPF_SOCKHASH_FILENAME);
+out:
+	return err;
+}
+
+int main(int argc, char **argv)
+{
+	int iov_count = 1, length = 1024, rate = 1;
+	struct sockmap_options options = {0};
+	int opt, longindex, err, cg_fd = 0;
+	char *bpf_file = BPF_SOCKMAP_FILENAME;
+	int test = PING_PONG;
+
+	if (argc < 2)
+		return test_suite();
+
+	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+				  long_options, &longindex)) != -1) {
+		switch (opt) {
+		case 's':
+			txmsg_start = atoi(optarg);
+			break;
+		case 'e':
+			txmsg_end = atoi(optarg);
+			break;
+		case 'a':
+			txmsg_apply = atoi(optarg);
+			break;
+		case 'k':
+			txmsg_cork = atoi(optarg);
+			break;
+		case 'c':
+			cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+			if (cg_fd < 0) {
+				fprintf(stderr,
+					"ERROR: (%i) open cg path failed: %s\n",
+					cg_fd, optarg);
+				return cg_fd;
+			}
+			break;
+		case 'r':
+			rate = atoi(optarg);
+			break;
+		case 'v':
+			options.verbose = 1;
+			break;
+		case 'i':
+			iov_count = atoi(optarg);
+			break;
+		case 'l':
+			length = atoi(optarg);
+			break;
+		case 'd':
+			options.data_test = true;
+			break;
+		case 't':
+			if (strcmp(optarg, "ping") == 0) {
+				test = PING_PONG;
+			} else if (strcmp(optarg, "sendmsg") == 0) {
+				test = SENDMSG;
+			} else if (strcmp(optarg, "base") == 0) {
+				test = BASE;
+			} else if (strcmp(optarg, "base_sendpage") == 0) {
+				test = BASE_SENDPAGE;
+			} else if (strcmp(optarg, "sendpage") == 0) {
+				test = SENDPAGE;
+			} else {
+				usage(argv);
+				return -1;
+			}
+			break;
+		case 0:
+			break;
+		case 'h':
+		default:
+			usage(argv);
+			return -1;
+		}
+	}
+
+	if (!cg_fd) {
+		fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+			argv[0]);
+		return -1;
+	}
+
+	err = populate_progs(bpf_file);
+	if (err) {
+		fprintf(stderr, "populate program: (%s) %s\n",
+			bpf_file, strerror(errno));
+		return 1;
+	}
+	running = 1;
+
+	/* catch SIGINT */
+	signal(SIGINT, running_handler);
+
+	options.iov_count = iov_count;
+	options.iov_length = length;
+	options.rate = rate;
+
+	err = run_options(&options, cg_fd, test);
+	close(cg_fd);
+	return err;
+}
+
+void running_handler(int a)
+{
+	running = 0;
+}
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c
new file mode 100644
index 000000000..677b2ed1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#define SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP
+#include "./test_sockmap_kern.h"
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
new file mode 100644
index 000000000..8e8e41780
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* Sockmap sample program connects a client and a backend together
+ * using cgroups.
+ *
+ *    client:X <---> frontend:80 client:X <---> backend:80
+ *
+ * For simplicity we hard code values here and bind 1:1. The hard
+ * coded values are part of the setup in sockmap.sh script that
+ * is associated with this BPF program.
+ *
+ * The bpf_printk is verbose and prints information as connections
+ * are established and verdicts are decided.
+ */
+
+#define bpf_printk(fmt, ...)					\
+({								\
+	       char ____fmt[] = fmt;				\
+	       bpf_trace_printk(____fmt, sizeof(____fmt),	\
+				##__VA_ARGS__);			\
+})
+
+struct bpf_map_def SEC("maps") sock_map = {
+	.type = TEST_MAP_TYPE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+	.type = TEST_MAP_TYPE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+	.type = TEST_MAP_TYPE,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 2
+};
+
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	int len, *f, ret, zero = 0;
+	__u64 flags = 0;
+
+	if (lport == 10000)
+		ret = 10;
+	else
+		ret = 1;
+
+	len = (__u32)skb->data_end - (__u32)skb->data;
+	f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+	if (f && *f) {
+		ret = 3;
+		flags = *f;
+	}
+
+	bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+		   len, flags);
+#ifdef SOCKMAP
+	return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
+#else
+	return bpf_sk_redirect_hash(skb, &sock_map, &ret, flags);
+#endif
+
+}
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+	__u32 lport, rport;
+	int op, err = 0, index, key, ret;
+
+
+	op = (int) skops->op;
+
+	switch (op) {
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		lport = skops->local_port;
+		rport = skops->remote_port;
+
+		if (lport == 10000) {
+			ret = 1;
+#ifdef SOCKMAP
+			err = bpf_sock_map_update(skops, &sock_map, &ret,
+						  BPF_NOEXIST);
+#else
+			err = bpf_sock_hash_update(skops, &sock_map, &ret,
+						   BPF_NOEXIST);
+#endif
+			bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
+				   lport, bpf_ntohl(rport), err);
+		}
+		break;
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		lport = skops->local_port;
+		rport = skops->remote_port;
+
+		if (bpf_ntohl(rport) == 10001) {
+			ret = 10;
+#ifdef SOCKMAP
+			err = bpf_sock_map_update(skops, &sock_map, &ret,
+						  BPF_NOEXIST);
+#else
+			err = bpf_sock_hash_update(skops, &sock_map, &ret,
+						   BPF_NOEXIST);
+#endif
+			bpf_printk("active(%i -> %i) map ctx update err: %d\n",
+				   lport, bpf_ntohl(rport), err);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+	int err1 = -1, err2 = -1, zero = 0, one = 1;
+	int *bytes, *start, *end, len1, len2;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end) {
+		int err;
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+		   len1, err1, err2);
+	return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1, key = 0;
+	int *start, *end, *f;
+	__u64 flags = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+#ifdef SOCKMAP
+	return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+	return bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+	int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
+	int *f, *bytes, *start, *end, len1, len2;
+	__u64 flags = 0;
+
+		int err;
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end) {
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+	bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+		   len1, flags, err1 ? err1 : err2);
+#ifdef SOCKMAP
+	err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+	err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
+	bpf_printk("sk_msg3: err %i\n", err);
+	return err;
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes) {
+		ret = bpf_msg_apply_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	} else {
+		return SK_DROP;
+	}
+	return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes) {
+		if (((__u64)data_end - (__u64)data) >= *bytes)
+			return SK_PASS;
+		ret = bpf_msg_cork_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	}
+	return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1;
+	int *start, *end;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+
+	return SK_DROP;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
new file mode 100644
index 000000000..d86c281e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH         127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct bpf_stack_build_id)
+		* PERF_MAX_STACK_DEPTH,
+	.max_entries = 128,
+	.map_flags = BPF_F_STACK_BUILD_ID,
+};
+
+struct bpf_map_def SEC("maps") stack_amap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct bpf_stack_build_id)
+		* PERF_MAX_STACK_DEPTH,
+	.max_entries = 128,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
+struct random_urandom_args {
+	unsigned long long pad;
+	int got_bits;
+	int pool_left;
+	int input_left;
+};
+
+SEC("tracepoint/random/urandom_read")
+int oncpu(struct random_urandom_args *args)
+{
+	__u32 max_len = sizeof(struct bpf_stack_build_id)
+			* PERF_MAX_STACK_DEPTH;
+	__u32 key = 0, val = 0, *value_p;
+	void *stack_p;
+
+	value_p = bpf_map_lookup_elem(&control_map, &key);
+	if (value_p && *value_p)
+		return 0; /* skip if non-zero *value_p */
+
+	/* The size of stackmap and stackid_hmap should be the same */
+	key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
+	if ((int)key >= 0) {
+		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+		stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+		if (stack_p)
+			bpf_get_stack(args, stack_p, max_len,
+				      BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c
new file mode 100644
index 000000000..af111af7c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_map.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH         127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+	.type = BPF_MAP_TYPE_STACK_TRACE,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+	.max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stack_amap = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+	.max_entries = 16384,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+	unsigned long long pad;
+	char prev_comm[16];
+	int prev_pid;
+	int prev_prio;
+	long long prev_state;
+	char next_comm[16];
+	int next_pid;
+	int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+	__u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
+	__u32 key = 0, val = 0, *value_p;
+	void *stack_p;
+
+	value_p = bpf_map_lookup_elem(&control_map, &key);
+	if (value_p && *value_p)
+		return 0; /* skip if non-zero *value_p */
+
+	/* The size of stackmap and stackid_hmap should be the same */
+	key = bpf_get_stackid(ctx, &stackmap, 0);
+	if ((int)key >= 0) {
+		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+		stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+		if (stack_p)
+			bpf_get_stack(ctx, stack_p, max_len, 0);
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
new file mode 100644
index 000000000..6272c784c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <time.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sched.h>
+#include <limits.h>
+#include <assert.h>
+
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_alg.h>
+
+#include <bpf/bpf.h>
+
+#include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
+
+static struct bpf_insn prog[BPF_MAXINSNS];
+
+static void bpf_gen_imm_prog(unsigned int insns, int fd_map)
+{
+	int i;
+
+	srand(time(NULL));
+	for (i = 0; i < insns; i++)
+		prog[i] = BPF_ALU64_IMM(BPF_MOV, i % BPF_REG_10, rand());
+	prog[i - 1] = BPF_EXIT_INSN();
+}
+
+static void bpf_gen_map_prog(unsigned int insns, int fd_map)
+{
+	int i, j = 0;
+
+	for (i = 0; i + 1 < insns; i += 2) {
+		struct bpf_insn tmp[] = {
+			BPF_LD_MAP_FD(j++ % BPF_REG_10, fd_map)
+		};
+
+		memcpy(&prog[i], tmp, sizeof(tmp));
+	}
+	if (insns % 2 == 0)
+		prog[insns - 2] = BPF_ALU64_IMM(BPF_MOV, i % BPF_REG_10, 42);
+	prog[insns - 1] = BPF_EXIT_INSN();
+}
+
+static int bpf_try_load_prog(int insns, int fd_map,
+			     void (*bpf_filler)(unsigned int insns,
+						int fd_map))
+{
+	int fd_prog;
+
+	bpf_filler(insns, fd_map);
+	fd_prog = bpf_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0,
+				   NULL, 0);
+	assert(fd_prog > 0);
+	if (fd_map > 0)
+		bpf_filler(insns, 0);
+	return fd_prog;
+}
+
+static int __hex2bin(char ch)
+{
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	ch = tolower(ch);
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	return -1;
+}
+
+static int hex2bin(uint8_t *dst, const char *src, size_t count)
+{
+	while (count--) {
+		int hi = __hex2bin(*src++);
+		int lo = __hex2bin(*src++);
+
+		if ((hi < 0) || (lo < 0))
+			return -1;
+		*dst++ = (hi << 4) | lo;
+	}
+	return 0;
+}
+
+static void tag_from_fdinfo(int fd_prog, uint8_t *tag, uint32_t len)
+{
+	const int prefix_len = sizeof("prog_tag:\t") - 1;
+	char buff[256];
+	int ret = -1;
+	FILE *fp;
+
+	snprintf(buff, sizeof(buff), "/proc/%d/fdinfo/%d", getpid(),
+		 fd_prog);
+	fp = fopen(buff, "r");
+	assert(fp);
+
+	while (fgets(buff, sizeof(buff), fp)) {
+		if (strncmp(buff, "prog_tag:\t", prefix_len))
+			continue;
+		ret = hex2bin(tag, buff + prefix_len, len);
+		break;
+	}
+
+	fclose(fp);
+	assert(!ret);
+}
+
+static void tag_from_alg(int insns, uint8_t *tag, uint32_t len)
+{
+	static const struct sockaddr_alg alg = {
+		.salg_family	= AF_ALG,
+		.salg_type	= "hash",
+		.salg_name	= "sha1",
+	};
+	int fd_base, fd_alg, ret;
+	ssize_t size;
+
+	fd_base = socket(AF_ALG, SOCK_SEQPACKET, 0);
+	assert(fd_base > 0);
+
+	ret = bind(fd_base, (struct sockaddr *)&alg, sizeof(alg));
+	assert(!ret);
+
+	fd_alg = accept(fd_base, NULL, 0);
+	assert(fd_alg > 0);
+
+	insns *= sizeof(struct bpf_insn);
+	size = write(fd_alg, prog, insns);
+	assert(size == insns);
+
+	size = read(fd_alg, tag, len);
+	assert(size == len);
+
+	close(fd_alg);
+	close(fd_base);
+}
+
+static void tag_dump(const char *prefix, uint8_t *tag, uint32_t len)
+{
+	int i;
+
+	printf("%s", prefix);
+	for (i = 0; i < len; i++)
+		printf("%02x", tag[i]);
+	printf("\n");
+}
+
+static void tag_exit_report(int insns, int fd_map, uint8_t *ftag,
+			    uint8_t *atag, uint32_t len)
+{
+	printf("Program tag mismatch for %d insns%s!\n", insns,
+	       fd_map < 0 ? "" : " with map");
+
+	tag_dump("  fdinfo result: ", ftag, len);
+	tag_dump("  af_alg result: ", atag, len);
+	exit(1);
+}
+
+static void do_test(uint32_t *tests, int start_insns, int fd_map,
+		    void (*bpf_filler)(unsigned int insns, int fd))
+{
+	int i, fd_prog;
+
+	for (i = start_insns; i <= BPF_MAXINSNS; i++) {
+		uint8_t ftag[8], atag[sizeof(ftag)];
+
+		fd_prog = bpf_try_load_prog(i, fd_map, bpf_filler);
+		tag_from_fdinfo(fd_prog, ftag, sizeof(ftag));
+		tag_from_alg(i, atag, sizeof(atag));
+		if (memcmp(ftag, atag, sizeof(ftag)))
+			tag_exit_report(i, fd_map, ftag, atag, sizeof(ftag));
+
+		close(fd_prog);
+		sched_yield();
+		(*tests)++;
+	}
+}
+
+int main(void)
+{
+	uint32_t tests = 0;
+	int i, fd_map;
+
+	fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
+				sizeof(int), 1, BPF_F_NO_PREALLOC);
+	assert(fd_map > 0);
+
+	for (i = 0; i < 5; i++) {
+		do_test(&tests, 2, -1,     bpf_gen_imm_prog);
+		do_test(&tests, 3, fd_map, bpf_gen_map_prog);
+	}
+
+	printf("test_tag: OK (%u tests)\n", tests);
+	close(fd_map);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_tcp_estats.c b/tools/testing/selftests/bpf/test_tcp_estats.c
new file mode 100644
index 000000000..bee3bbecc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_estats.c
@@ -0,0 +1,258 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+/* This program shows clang/llvm is able to generate code pattern
+ * like:
+ *   _tcp_send_active_reset:
+ *      0:       bf 16 00 00 00 00 00 00         r6 = r1
+ *    ......
+ *    335:       b7 01 00 00 0f 00 00 00         r1 = 15
+ *    336:       05 00 48 00 00 00 00 00         goto 72
+ *
+ *   LBB0_3:
+ *    337:       b7 01 00 00 01 00 00 00         r1 = 1
+ *    338:       63 1a d0 ff 00 00 00 00         *(u32 *)(r10 - 48) = r1
+ *    408:       b7 01 00 00 03 00 00 00         r1 = 3
+ *
+ *   LBB0_4:
+ *    409:       71 a2 fe ff 00 00 00 00         r2 = *(u8 *)(r10 - 2)
+ *    410:       bf a7 00 00 00 00 00 00         r7 = r10
+ *    411:       07 07 00 00 b8 ff ff ff         r7 += -72
+ *    412:       bf 73 00 00 00 00 00 00         r3 = r7
+ *    413:       0f 13 00 00 00 00 00 00         r3 += r1
+ *    414:       73 23 2d 00 00 00 00 00         *(u8 *)(r3 + 45) = r2
+ *
+ * From the above code snippet, the code generated by the compiler
+ * is reasonable. The "r1" is assigned to different values in basic
+ * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4".
+ * The verifier should be able to handle such code patterns.
+ */
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/ipv6.h>
+#include <linux/version.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define TCP_ESTATS_MAGIC 0xBAADBEEF
+
+/* This test case needs "sock" and "pt_regs" data structure.
+ * Recursively, "sock" needs "sock_common" and "inet_sock".
+ * However, this is a unit test case only for
+ * verifier purpose without bpf program execution.
+ * We can safely mock much simpler data structures, basically
+ * only taking the necessary fields from kernel headers.
+ */
+typedef __u32 __bitwise __portpair;
+typedef __u64 __bitwise __addrpair;
+
+struct sock_common {
+	unsigned short		skc_family;
+	union {
+		__addrpair	skc_addrpair;
+		struct {
+			__be32	skc_daddr;
+			__be32	skc_rcv_saddr;
+		};
+	};
+	union {
+		__portpair	skc_portpair;
+		struct {
+			__be16	skc_dport;
+			__u16	skc_num;
+		};
+	};
+	struct in6_addr		skc_v6_daddr;
+	struct in6_addr		skc_v6_rcv_saddr;
+};
+
+struct sock {
+	struct sock_common	__sk_common;
+#define sk_family		__sk_common.skc_family
+#define sk_v6_daddr		__sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr		__sk_common.skc_v6_rcv_saddr
+};
+
+struct inet_sock {
+	struct sock		sk;
+#define inet_daddr		sk.__sk_common.skc_daddr
+#define inet_dport		sk.__sk_common.skc_dport
+	__be32			inet_saddr;
+	__be16			inet_sport;
+};
+
+struct pt_regs {
+	long di;
+};
+
+static inline struct inet_sock *inet_sk(const struct sock *sk)
+{
+	return (struct inet_sock *)sk;
+}
+
+/* Define various data structures for state recording.
+ * Some fields are not used due to test simplification.
+ */
+enum tcp_estats_addrtype {
+	TCP_ESTATS_ADDRTYPE_IPV4 = 1,
+	TCP_ESTATS_ADDRTYPE_IPV6 = 2
+};
+
+enum tcp_estats_event_type {
+	TCP_ESTATS_ESTABLISH,
+	TCP_ESTATS_PERIODIC,
+	TCP_ESTATS_TIMEOUT,
+	TCP_ESTATS_RETRANSMIT_TIMEOUT,
+	TCP_ESTATS_RETRANSMIT_OTHER,
+	TCP_ESTATS_SYN_RETRANSMIT,
+	TCP_ESTATS_SYNACK_RETRANSMIT,
+	TCP_ESTATS_TERM,
+	TCP_ESTATS_TX_RESET,
+	TCP_ESTATS_RX_RESET,
+	TCP_ESTATS_WRITE_TIMEOUT,
+	TCP_ESTATS_CONN_TIMEOUT,
+	TCP_ESTATS_ACK_LATENCY,
+	TCP_ESTATS_NEVENTS,
+};
+
+struct tcp_estats_event {
+	int pid;
+	int cpu;
+	unsigned long ts;
+	unsigned int magic;
+	enum tcp_estats_event_type event_type;
+};
+
+/* The below data structure is packed in order for
+ * llvm compiler to generate expected code.
+ */
+struct tcp_estats_conn_id {
+	unsigned int localaddressType;
+	struct {
+		unsigned char data[16];
+	} localaddress;
+	struct {
+		unsigned char data[16];
+	} remaddress;
+	unsigned short    localport;
+	unsigned short    remport;
+} __attribute__((__packed__));
+
+struct tcp_estats_basic_event {
+	struct tcp_estats_event event;
+	struct tcp_estats_conn_id conn_id;
+};
+
+struct bpf_map_def SEC("maps") ev_record_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct tcp_estats_basic_event),
+	.max_entries = 1024,
+};
+
+struct dummy_tracepoint_args {
+	unsigned long long pad;
+	struct sock *sock;
+};
+
+static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event,
+					       enum tcp_estats_event_type type)
+{
+	event->magic = TCP_ESTATS_MAGIC;
+	event->ts = bpf_ktime_get_ns();
+	event->event_type = type;
+}
+
+static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from)
+{
+	to[0] = _(from[0]);
+	to[1] = _(from[1]);
+	to[2] = _(from[2]);
+	to[3] = _(from[3]);
+}
+
+static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id,
+					      __be32 *saddr, __be32 *daddr)
+{
+	conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4;
+
+	unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
+	unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr);
+}
+
+static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id,
+					      __be32 *saddr, __be32 *daddr)
+{
+	conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6;
+
+	unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
+	unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32),
+			  (__u8 *)(saddr + 1));
+	unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2,
+			  (__u8 *)(saddr + 2));
+	unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3,
+			  (__u8 *)(saddr + 3));
+
+	unaligned_u32_set(conn_id->remaddress.data,
+			  (__u8 *)(daddr));
+	unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32),
+			  (__u8 *)(daddr + 1));
+	unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2,
+			  (__u8 *)(daddr + 2));
+	unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3,
+			  (__u8 *)(daddr + 3));
+}
+
+static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id,
+						    struct sock *sk)
+{
+	conn_id->localport = _(inet_sk(sk)->inet_sport);
+	conn_id->remport = _(inet_sk(sk)->inet_dport);
+
+	if (_(sk->sk_family) == AF_INET6)
+		conn_id_ipv6_init(conn_id,
+				  sk->sk_v6_rcv_saddr.s6_addr32,
+				  sk->sk_v6_daddr.s6_addr32);
+	else
+		conn_id_ipv4_init(conn_id,
+				  &inet_sk(sk)->inet_saddr,
+				  &inet_sk(sk)->inet_daddr);
+}
+
+static __always_inline void tcp_estats_init(struct sock *sk,
+					    struct tcp_estats_event *event,
+					    struct tcp_estats_conn_id *conn_id,
+					    enum tcp_estats_event_type type)
+{
+	tcp_estats_ev_init(event, type);
+	tcp_estats_conn_id_init(conn_id, sk);
+}
+
+static __always_inline void send_basic_event(struct sock *sk,
+					     enum tcp_estats_event_type type)
+{
+	struct tcp_estats_basic_event ev;
+	__u32 key = bpf_get_prandom_u32();
+
+	memset(&ev, 0, sizeof(ev));
+	tcp_estats_init(sk, &ev.event, &ev.conn_id, type);
+	bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+	if (!arg->sock)
+		return 0;
+
+	send_basic_event(arg->sock, TCP_ESTATS_TX_RESET);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
new file mode 100644
index 000000000..7bcfa6207
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _TEST_TCPBPF_H
+#define _TEST_TCPBPF_H
+
+struct tcpbpf_globals {
+	__u32 event_map;
+	__u32 total_retrans;
+	__u32 data_segs_in;
+	__u32 data_segs_out;
+	__u32 bad_cb_test_rv;
+	__u32 good_cb_test_rv;
+	__u64 bytes_received;
+	__u64 bytes_acked;
+	__u32 num_listen;
+};
+#endif
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
new file mode 100644
index 000000000..4b7fd540c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_tcpbpf.h"
+
+struct bpf_map_def SEC("maps") global_map = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct tcpbpf_globals),
+	.max_entries = 2,
+};
+
+static inline void update_event_map(int event)
+{
+	__u32 key = 0;
+	struct tcpbpf_globals g, *gp;
+
+	gp = bpf_map_lookup_elem(&global_map, &key);
+	if (gp == NULL) {
+		struct tcpbpf_globals g = {0};
+
+		g.event_map |= (1 << event);
+		bpf_map_update_elem(&global_map, &key, &g,
+			    BPF_ANY);
+	} else {
+		g = *gp;
+		g.event_map |= (1 << event);
+		bpf_map_update_elem(&global_map, &key, &g,
+			    BPF_ANY);
+	}
+}
+
+int _version SEC("version") = 1;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+	int rv = -1;
+	int bad_call_rv = 0;
+	int good_call_rv = 0;
+	int op;
+	int v = 0;
+
+	op = (int) skops->op;
+
+	update_event_map(op);
+
+	switch (op) {
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		/* Test failure to set largest cb flag (assumes not defined) */
+		bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
+		/* Set callback */
+		good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+						 BPF_SOCK_OPS_STATE_CB_FLAG);
+		/* Update results */
+		{
+			__u32 key = 0;
+			struct tcpbpf_globals g, *gp;
+
+			gp = bpf_map_lookup_elem(&global_map, &key);
+			if (!gp)
+				break;
+			g = *gp;
+			g.bad_cb_test_rv = bad_call_rv;
+			g.good_cb_test_rv = good_call_rv;
+			bpf_map_update_elem(&global_map, &key, &g,
+					    BPF_ANY);
+		}
+		break;
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		skops->sk_txhash = 0x12345f;
+		v = 0xff;
+		rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
+				    sizeof(v));
+		break;
+	case BPF_SOCK_OPS_RTO_CB:
+		break;
+	case BPF_SOCK_OPS_RETRANS_CB:
+		break;
+	case BPF_SOCK_OPS_STATE_CB:
+		if (skops->args[1] == BPF_TCP_CLOSE) {
+			__u32 key = 0;
+			struct tcpbpf_globals g, *gp;
+
+			gp = bpf_map_lookup_elem(&global_map, &key);
+			if (!gp)
+				break;
+			g = *gp;
+			if (skops->args[0] == BPF_TCP_LISTEN) {
+				g.num_listen++;
+			} else {
+				g.total_retrans = skops->total_retrans;
+				g.data_segs_in = skops->data_segs_in;
+				g.data_segs_out = skops->data_segs_out;
+				g.bytes_received = skops->bytes_received;
+				g.bytes_acked = skops->bytes_acked;
+			}
+			bpf_map_update_elem(&global_map, &key, &g,
+					    BPF_ANY);
+		}
+		break;
+	case BPF_SOCK_OPS_TCP_LISTEN_CB:
+		bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+		break;
+	default:
+		rv = -1;
+	}
+	skops->reply = rv;
+	return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
new file mode 100644
index 000000000..a275c2971
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <sys/types.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#include "test_tcpbpf.h"
+
+#define EXPECT_EQ(expected, actual, fmt)			\
+	do {							\
+		if ((expected) != (actual)) {			\
+			printf("  Value of: " #actual "\n"	\
+			       "    Actual: %" fmt "\n"		\
+			       "  Expected: %" fmt "\n",	\
+			       (actual), (expected));		\
+			goto err;				\
+		}						\
+	} while (0)
+
+int verify_result(const struct tcpbpf_globals *result)
+{
+	__u32 expected_events;
+
+	expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
+			   (1 << BPF_SOCK_OPS_RWND_INIT) |
+			   (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
+			   (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
+			   (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
+			   (1 << BPF_SOCK_OPS_NEEDS_ECN) |
+			   (1 << BPF_SOCK_OPS_STATE_CB) |
+			   (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
+
+	EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
+	EXPECT_EQ(501ULL, result->bytes_received, "llu");
+	EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
+	EXPECT_EQ(1, result->data_segs_in, PRIu32);
+	EXPECT_EQ(1, result->data_segs_out, PRIu32);
+	EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
+	EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
+	EXPECT_EQ(1, result->num_listen, PRIu32);
+
+	return 0;
+err:
+	return -1;
+}
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+			const char *name)
+{
+	struct bpf_map *map;
+
+	map = bpf_object__find_map_by_name(obj, name);
+	if (!map) {
+		printf("%s:FAIL:map '%s' not found\n", test, name);
+		return -1;
+	}
+	return bpf_map__fd(map);
+}
+
+int main(int argc, char **argv)
+{
+	const char *file = "test_tcpbpf_kern.o";
+	struct tcpbpf_globals g = {0};
+	const char *cg_path = "/foo";
+	int error = EXIT_FAILURE;
+	struct bpf_object *obj;
+	int prog_fd, map_fd;
+	int cg_fd = -1;
+	__u32 key = 0;
+	int rv;
+
+	if (setup_cgroup_environment())
+		goto err;
+
+	cg_fd = create_and_get_cgroup(cg_path);
+	if (!cg_fd)
+		goto err;
+
+	if (join_cgroup(cg_path))
+		goto err;
+
+	if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+		printf("FAILED: load_bpf_file failed for: %s\n", file);
+		goto err;
+	}
+
+	rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (rv) {
+		printf("FAILED: bpf_prog_attach: %d (%s)\n",
+		       error, strerror(errno));
+		goto err;
+	}
+
+	if (system("./tcp_server.py")) {
+		printf("FAILED: TCP server\n");
+		goto err;
+	}
+
+	map_fd = bpf_find_map(__func__, obj, "global_map");
+	if (map_fd < 0)
+		goto err;
+
+	rv = bpf_map_lookup_elem(map_fd, &key, &g);
+	if (rv != 0) {
+		printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
+		goto err;
+	}
+
+	if (verify_result(&g)) {
+		printf("FAILED: Wrong stats\n");
+		goto err;
+	}
+
+	printf("PASSED!\n");
+	error = 0;
+err:
+	bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+	close(cg_fd);
+	cleanup_cgroup_environment();
+	return error;
+}
diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/test_tracepoint.c
new file mode 100644
index 000000000..04bf08451
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tracepoint.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+	unsigned long long pad;
+	char prev_comm[16];
+	int prev_pid;
+	int prev_prio;
+	long long prev_state;
+	char next_comm[16];
+	int next_pid;
+	int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
new file mode 100755
index 000000000..546aee3e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -0,0 +1,731 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# End-to-end eBPF tunnel test suite
+#   The script tests BPF network tunnel implementation.
+#
+# Topology:
+# ---------
+#     root namespace   |     at_ns0 namespace
+#                      |
+#      -----------     |     -----------
+#      | tnl dev |     |     | tnl dev |  (overlay network)
+#      -----------     |     -----------
+#      metadata-mode   |     native-mode
+#       with bpf       |
+#                      |
+#      ----------      |     ----------
+#      |  veth1  | --------- |  veth0  |  (underlay network)
+#      ----------    peer    ----------
+#
+#
+# Device Configuration
+# --------------------
+# Root namespace with metadata-mode tunnel + BPF
+# Device names and addresses:
+# 	veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
+# 	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+#
+# Namespace at_ns0 with native tunnel
+# Device names and addresses:
+# 	veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+# 	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+#
+#
+# End-to-end ping packet flow
+# ---------------------------
+# Most of the tests start by namespace creation, device configuration,
+# then ping the underlay and overlay network.  When doing 'ping 10.1.1.100'
+# from root namespace, the following operations happen:
+# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+#    with remote_ip=172.16.1.200 and others.
+# 3) Outer tunnel header is prepended and route the packet to veth1's egress
+# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
+# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
+# 6) Forward the packet to the overlay tnl dev
+
+PING_ARG="-c 3 -w 10 -q"
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+config_device()
+{
+	ip netns add at_ns0
+	ip link add veth0 type veth peer name veth1
+	ip link set veth0 netns at_ns0
+	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip link set dev veth1 up mtu 1500
+	ip addr add dev veth1 172.16.1.200/24
+}
+
+add_gre_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+        ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE key 2 external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6gretap_tunnel()
+{
+
+	# assign ipv6 address
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
+		local ::11 remote ::22
+
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip addr add dev $DEV fc80::200/24
+	ip link set dev $DEV up
+}
+
+add_erspan_tunnel()
+{
+	# at_ns0 namespace
+	if [ "$1" == "v1" ]; then
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 1 erspan 123
+	else
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 2 erspan_dir egress erspan_hwid 3
+	fi
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6erspan_tunnel()
+{
+
+	# assign ipv6 address
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	if [ "$1" == "v1" ]; then
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local ::11 remote ::22 \
+		erspan_ver 1 erspan 123
+	else
+		ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE seq key 2 \
+		local ::11 remote ::22 \
+		erspan_ver 2 erspan_dir egress erspan_hwid 7
+	fi
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_vxlan_tunnel()
+{
+	# Set static ARP entry here because iptables set-mark works
+	# on L3 packet, as a result not applying to ARP packets,
+	# causing errors at get_tunnel_{key/opt}.
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		id 2 dstport 4789 gbp remote 172.16.1.200
+	ip netns exec at_ns0 \
+		ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+	ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external gbp dstport 4789
+	ip link set dev $DEV address 52:54:00:d9:02:00 up
+	ip addr add dev $DEV 10.1.1.200/24
+	arp -s 10.1.1.100 52:54:00:d9:01:00
+}
+
+add_ip6vxlan_tunnel()
+{
+	#ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
+	ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	#ip -4 addr del 172.16.1.200 dev veth1
+	ip -6 addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
+		local ::11 remote ::22
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external dstport 4789
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_geneve_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		id 2 dstport 6081 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE dstport 6081 external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6geneve_tunnel()
+{
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE id 22 \
+		remote ::22     # geneve has no local option
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+add_ipip_tunnel()
+{
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		local 172.16.1.100 remote 172.16.1.200
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip link set dev $DEV up
+	ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ipip6tnl_tunnel()
+{
+	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+	ip netns exec at_ns0 ip link set dev veth0 up
+	ip addr add dev veth1 ::22/96
+	ip link set dev veth1 up
+
+	# at_ns0 namespace
+	ip netns exec at_ns0 \
+		ip link add dev $DEV_NS type $TYPE \
+		local ::11 remote ::22
+	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+	ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+	# root namespace
+	ip link add dev $DEV type $TYPE external
+	ip addr add dev $DEV 10.1.1.200/24
+	ip link set dev $DEV up
+}
+
+test_gre()
+{
+	TYPE=gretap
+	DEV_NS=gretap00
+	DEV=gretap11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_gre_tunnel
+	attach_bpf $DEV gre_set_tunnel gre_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gre()
+{
+	TYPE=ip6gre
+	DEV_NS=ip6gre00
+	DEV=ip6gre11
+	ret=0
+
+	check $TYPE
+	config_device
+	# reuse the ip6gretap function
+	add_ip6gretap_tunnel
+	attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# overlay: ipv4 over ipv6
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	# overlay: ipv6 over ipv6
+	ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+	check_err $?
+	cleanup
+
+        if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gretap()
+{
+	TYPE=ip6gretap
+	DEV_NS=ip6gretap00
+	DEV=ip6gretap11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6gretap_tunnel
+	attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# overlay: ipv4 over ipv6
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	# overlay: ipv6 over ipv6
+	ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_erspan()
+{
+	TYPE=erspan
+	DEV_NS=erspan00
+	DEV=erspan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_erspan_tunnel $1
+	attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6erspan()
+{
+	TYPE=ip6erspan
+	DEV_NS=ip6erspan00
+	DEV=ip6erspan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6erspan_tunnel $1
+	attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
+	ping6 $PING_ARG ::11
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_vxlan()
+{
+	TYPE=vxlan
+	DEV_NS=vxlan00
+	DEV=vxlan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_vxlan_tunnel
+	attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6vxlan()
+{
+	TYPE=vxlan
+	DEV_NS=ip6vxlan00
+	DEV=ip6vxlan11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6vxlan_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# ip4 over ip6
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_geneve()
+{
+	TYPE=geneve
+	DEV_NS=geneve00
+	DEV=geneve11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_geneve_tunnel
+	attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6geneve()
+{
+	TYPE=geneve
+	DEV_NS=ip6geneve00
+	DEV=ip6geneve11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ip6geneve_tunnel
+	attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_ipip()
+{
+	TYPE=ipip
+	DEV_NS=ipip00
+	DEV=ipip11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ipip_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ipip6()
+{
+	TYPE=ip6tnl
+	DEV_NS=ipip6tnl00
+	DEV=ipip6tnl11
+	ret=0
+
+	check $TYPE
+	config_device
+	add_ipip6tnl_tunnel
+	ip link set dev veth1 mtu 1500
+	attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
+	# underlay
+	ping6 $PING_ARG ::11
+	# ip4 over ip6
+	ping $PING_ARG 10.1.1.100
+	check_err $?
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+                echo -e ${RED}"FAIL: $TYPE"${NC}
+                return 1
+        fi
+        echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+setup_xfrm_tunnel()
+{
+	auth=0x$(printf '1%.0s' {1..40})
+	enc=0x$(printf '2%.0s' {1..32})
+	spi_in_to_out=0x1
+	spi_out_to_in=0x2
+	# at_ns0 namespace
+	# at_ns0 -> root
+	ip netns exec at_ns0 \
+		ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+			spi $spi_in_to_out reqid 1 mode tunnel \
+			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+	ip netns exec at_ns0 \
+		ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
+		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+		mode tunnel
+	# root -> at_ns0
+	ip netns exec at_ns0 \
+		ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+			spi $spi_out_to_in reqid 2 mode tunnel \
+			auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+	ip netns exec at_ns0 \
+		ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
+		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+		mode tunnel
+	# address & route
+	ip netns exec at_ns0 \
+		ip addr add dev veth0 10.1.1.100/32
+	ip netns exec at_ns0 \
+		ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
+			src 10.1.1.100
+
+	# root namespace
+	# at_ns0 -> root
+	ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+		spi $spi_in_to_out reqid 1 mode tunnel \
+		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
+	ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
+		tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+		mode tunnel
+	# root -> at_ns0
+	ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+		spi $spi_out_to_in reqid 2 mode tunnel \
+		auth-trunc 'hmac(sha1)' $auth 96  enc 'cbc(aes)' $enc
+	ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
+		tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+		mode tunnel
+	# address & route
+	ip addr add dev veth1 10.1.1.200/32
+	ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
+}
+
+test_xfrm_tunnel()
+{
+	config_device
+	> /sys/kernel/debug/tracing/trace
+	setup_xfrm_tunnel
+	tc qdisc add dev veth1 clsact
+	tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
+		sec xfrm_get_state
+	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+	sleep 1
+	grep "reqid 1" /sys/kernel/debug/tracing/trace
+	check_err $?
+	grep "spi 0x1" /sys/kernel/debug/tracing/trace
+	check_err $?
+	grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
+	check_err $?
+	cleanup
+
+	if [ $ret -ne 0 ]; then
+		echo -e ${RED}"FAIL: xfrm tunnel"${NC}
+		return 1
+	fi
+	echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+}
+
+attach_bpf()
+{
+	DEV=$1
+	SET=$2
+	GET=$3
+	tc qdisc add dev $DEV clsact
+	tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
+	tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+}
+
+cleanup()
+{
+	ip netns delete at_ns0 2> /dev/null
+	ip link del veth1 2> /dev/null
+	ip link del ipip11 2> /dev/null
+	ip link del ipip6tnl11 2> /dev/null
+	ip link del gretap11 2> /dev/null
+	ip link del ip6gre11 2> /dev/null
+	ip link del ip6gretap11 2> /dev/null
+	ip link del vxlan11 2> /dev/null
+	ip link del ip6vxlan11 2> /dev/null
+	ip link del geneve11 2> /dev/null
+	ip link del ip6geneve11 2> /dev/null
+	ip link del erspan11 2> /dev/null
+	ip link del ip6erspan11 2> /dev/null
+	ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
+	ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
+	ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
+	ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
+}
+
+cleanup_exit()
+{
+	echo "CATCH SIGKILL or SIGINT, cleanup and exit"
+	cleanup
+	exit 0
+}
+
+check()
+{
+	ip link help 2>&1 | grep -q "\s$1\s"
+	if [ $? -ne 0 ];then
+		echo "SKIP $1: iproute2 not support"
+	cleanup
+	return 1
+	fi
+}
+
+enable_debug()
+{
+	echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
+	echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
+}
+
+check_err()
+{
+	if [ $ret -eq 0 ]; then
+		ret=$1
+	fi
+}
+
+bpf_tunnel_test()
+{
+	echo "Testing GRE tunnel..."
+	test_gre
+	echo "Testing IP6GRE tunnel..."
+	test_ip6gre
+	echo "Testing IP6GRETAP tunnel..."
+	test_ip6gretap
+	echo "Testing ERSPAN tunnel..."
+	test_erspan v2
+	echo "Testing IP6ERSPAN tunnel..."
+	test_ip6erspan v2
+	echo "Testing VXLAN tunnel..."
+	test_vxlan
+	echo "Testing IP6VXLAN tunnel..."
+	test_ip6vxlan
+	echo "Testing GENEVE tunnel..."
+	test_geneve
+	echo "Testing IP6GENEVE tunnel..."
+	test_ip6geneve
+	echo "Testing IPIP tunnel..."
+	test_ipip
+	echo "Testing IPIP6 tunnel..."
+	test_ipip6
+	echo "Testing IPSec tunnel..."
+	test_xfrm_tunnel
+}
+
+trap cleanup 0 3 6
+trap cleanup_exit 2 9
+
+cleanup
+bpf_tunnel_test
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_tunnel_kern.c b/tools/testing/selftests/bpf/test_tunnel_kern.c
new file mode 100644
index 000000000..504df69c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel_kern.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 VMware
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/pkt_cls.h>
+#include <linux/erspan.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define ERROR(ret) do {\
+		char fmt[] = "ERROR line:%d ret:%d\n";\
+		bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
+	} while (0)
+
+int _version SEC("version") = 1;
+
+struct geneve_opt {
+	__be16	opt_class;
+	__u8	type;
+	__u8	length:5;
+	__u8	r3:1;
+	__u8	r2:1;
+	__u8	r1:1;
+	__u8	opt_data[8]; /* hard-coded to 8 byte */
+};
+
+struct vxlan_metadata {
+	__u32     gbp;
+};
+
+SEC("gre_set_tunnel")
+int _gre_set_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("gre_get_tunnel")
+int _gre_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "key %d remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
+SEC("ip6gretap_set_tunnel")
+int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+	key.tunnel_label = 0xabcde;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+				     BPF_F_SEQ_NUMBER);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6gretap_get_tunnel")
+int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip6 ::%x label %x\n";
+	struct bpf_tunnel_key key;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+	return TC_ACT_OK;
+}
+
+SEC("erspan_set_tunnel")
+int _erspan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&md, 0, sizeof(md));
+#ifdef ERSPAN_V1
+	md.version = 1;
+	md.u.index = bpf_htonl(123);
+#else
+	__u8 direction = 1;
+	__u8 hwid = 7;
+
+	md.version = 2;
+	md.u.md2.dir = direction;
+	md.u.md2.hwid = hwid & 0xf;
+	md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("erspan_get_tunnel")
+int _erspan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	__u32 index;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+	char fmt2[] = "\tindex %x\n";
+
+	index = bpf_ntohl(md.u.index);
+	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+	bpf_trace_printk(fmt2, sizeof(fmt2),
+			 md.u.md2.dir,
+			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+			 bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+	return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_set_tunnel")
+int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11);
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&md, 0, sizeof(md));
+
+#ifdef ERSPAN_V1
+	md.u.index = bpf_htonl(123);
+	md.version = 1;
+#else
+	__u8 direction = 0;
+	__u8 hwid = 17;
+
+	md.version = 2;
+	md.u.md2.dir = direction;
+	md.u.md2.hwid = hwid & 0xf;
+	md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_get_tunnel")
+int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
+	struct bpf_tunnel_key key;
+	struct erspan_metadata md;
+	__u32 index;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+	char fmt2[] = "\tindex %x\n";
+
+	index = bpf_ntohl(md.u.index);
+	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+	bpf_trace_printk(fmt2, sizeof(fmt2),
+			 md.u.md2.dir,
+			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+			 bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+	return TC_ACT_OK;
+}
+
+SEC("vxlan_set_tunnel")
+int _vxlan_set_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("vxlan_get_tunnel")
+int _vxlan_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+	char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, md.gbp);
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_set_tunnel")
+int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 22;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_get_tunnel")
+int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip6 ::%x label %x\n";
+	struct bpf_tunnel_key key;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+	return TC_ACT_OK;
+}
+
+SEC("geneve_set_tunnel")
+int _geneve_set_tunnel(struct __sk_buff *skb)
+{
+	int ret, ret2;
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	__builtin_memset(&gopt, 0x0, sizeof(gopt));
+	gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+	gopt.type = 0x08;
+	gopt.r1 = 0;
+	gopt.r2 = 0;
+	gopt.r3 = 0;
+	gopt.length = 2; /* 4-byte multiple */
+	*(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("geneve_get_tunnel")
+int _geneve_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+	return TC_ACT_OK;
+}
+
+SEC("ip6geneve_set_tunnel")
+int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	int ret;
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_id = 22;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&gopt, 0x0, sizeof(gopt));
+	gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+	gopt.type = 0x08;
+	gopt.r1 = 0;
+	gopt.r2 = 0;
+	gopt.r3 = 0;
+	gopt.length = 2; /* 4-byte multiple */
+	*(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+
+	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6geneve_get_tunnel")
+int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+{
+	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+	struct bpf_tunnel_key key;
+	struct geneve_opt gopt;
+	int ret;
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip_set_tunnel")
+int _ipip_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.tunnel_ttl = 64;
+	if (iph->protocol == IPPROTO_ICMP) {
+		key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+	} else {
+		if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
+			return TC_ACT_SHOT;
+
+		if (tcp->dest == bpf_htons(5200))
+			key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+		else if (tcp->dest == bpf_htons(5201))
+			key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
+		else
+			return TC_ACT_SHOT;
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip_get_tunnel")
+int _ipip_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip 0x%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_set_tunnel")
+int _ipip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct iphdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ipip6_get_tunnel")
+int _ipip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+			 bpf_htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_set_tunnel")
+int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key = {};
+	void *data = (void *)(long)skb->data;
+	struct ipv6hdr *iph = data;
+	struct tcphdr *tcp = data + sizeof(*iph);
+	void *data_end = (void *)(long)skb->data_end;
+	int ret;
+
+	/* single length check */
+	if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+		ERROR(1);
+		return TC_ACT_SHOT;
+	}
+
+	key.remote_ipv6[0] = bpf_htonl(0x2401db00);
+	key.tunnel_ttl = 64;
+
+	if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
+		key.remote_ipv6[3] = bpf_htonl(1);
+	} else {
+		if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
+			ERROR(iph->nexthdr);
+			return TC_ACT_SHOT;
+		}
+
+		if (tcp->dest == bpf_htons(5200)) {
+			key.remote_ipv6[3] = bpf_htonl(1);
+		} else if (tcp->dest == bpf_htons(5201)) {
+			key.remote_ipv6[3] = bpf_htonl(2);
+		} else {
+			ERROR(tcp->dest);
+			return TC_ACT_SHOT;
+		}
+	}
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("ip6ip6_get_tunnel")
+int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	char fmt[] = "remote ip6 %x::%x\n";
+
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		ERROR(ret);
+		return TC_ACT_SHOT;
+	}
+
+	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+			 bpf_htonl(key.remote_ipv6[3]));
+	return TC_ACT_OK;
+}
+
+SEC("xfrm_get_state")
+int _xfrm_get_state(struct __sk_buff *skb)
+{
+	struct bpf_xfrm_state x;
+	char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
+	int ret;
+
+	ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
+	if (ret < 0)
+		return TC_ACT_OK;
+
+	bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
+			 bpf_ntohl(x.remote_ipv4));
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
new file mode 100644
index 000000000..858e55143
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -0,0 +1,13243 @@
+/*
+ * Testsuite for eBPF verifier
+ *
+ * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <endian.h>
+#include <asm/types.h>
+#include <linux/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <limits.h>
+
+#include <sys/capability.h>
+
+#include <linux/unistd.h>
+#include <linux/filter.h>
+#include <linux/bpf_perf_event.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+
+#include <bpf/bpf.h>
+
+#ifdef HAVE_GENHDR
+# include "autoconf.h"
+#else
+# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
+#  define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
+# endif
+#endif
+#include "bpf_rlimit.h"
+#include "bpf_rand.h"
+#include "bpf_util.h"
+#include "../../../include/linux/filter.h"
+
+#define MAX_INSNS	BPF_MAXINSNS
+#define MAX_FIXUPS	8
+#define MAX_NR_MAPS	8
+#define POINTER_VALUE	0xcafe4all
+#define TEST_DATA_LEN	64
+
+#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS	(1 << 0)
+#define F_LOAD_WITH_STRICT_ALIGNMENT		(1 << 1)
+
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
+struct bpf_test {
+	const char *descr;
+	struct bpf_insn	insns[MAX_INSNS];
+	int fixup_map1[MAX_FIXUPS];
+	int fixup_map2[MAX_FIXUPS];
+	int fixup_map3[MAX_FIXUPS];
+	int fixup_map4[MAX_FIXUPS];
+	int fixup_prog1[MAX_FIXUPS];
+	int fixup_prog2[MAX_FIXUPS];
+	int fixup_map_in_map[MAX_FIXUPS];
+	int fixup_cgroup_storage[MAX_FIXUPS];
+	const char *errstr;
+	const char *errstr_unpriv;
+	uint32_t retval, retval_unpriv;
+	enum {
+		UNDEF,
+		ACCEPT,
+		REJECT
+	} result, result_unpriv;
+	enum bpf_prog_type prog_type;
+	uint8_t flags;
+	__u8 data[TEST_DATA_LEN];
+	void (*fill_helper)(struct bpf_test *self);
+};
+
+/* Note we want this to be 64 bit aligned so that the end of our array is
+ * actually the end of the structure.
+ */
+#define MAX_ENTRIES 11
+
+struct test_val {
+	unsigned int index;
+	int foo[MAX_ENTRIES];
+};
+
+struct other_val {
+	long long foo;
+	long long bar;
+};
+
+static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
+{
+	/* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
+#define PUSH_CNT 51
+	unsigned int len = BPF_MAXINSNS;
+	struct bpf_insn *insn = self->insns;
+	int i = 0, j, k = 0;
+
+	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+loop:
+	for (j = 0; j < PUSH_CNT; j++) {
+		insn[i++] = BPF_LD_ABS(BPF_B, 0);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+		i++;
+		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+		insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
+		insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
+		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+					 BPF_FUNC_skb_vlan_push),
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+		i++;
+	}
+
+	for (j = 0; j < PUSH_CNT; j++) {
+		insn[i++] = BPF_LD_ABS(BPF_B, 0);
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+		i++;
+		insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+		insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+					 BPF_FUNC_skb_vlan_pop),
+		insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+		i++;
+	}
+	if (++k < 5)
+		goto loop;
+
+	for (; i < len - 1; i++)
+		insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
+	insn[len - 1] = BPF_EXIT_INSN();
+}
+
+static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
+{
+	struct bpf_insn *insn = self->insns;
+	unsigned int len = BPF_MAXINSNS;
+	int i = 0;
+
+	insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+	insn[i++] = BPF_LD_ABS(BPF_B, 0);
+	insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 10, len - i - 2);
+	i++;
+	while (i < len - 1)
+		insn[i++] = BPF_LD_ABS(BPF_B, 1);
+	insn[i] = BPF_EXIT_INSN();
+}
+
+static void bpf_fill_rand_ld_dw(struct bpf_test *self)
+{
+	struct bpf_insn *insn = self->insns;
+	uint64_t res = 0;
+	int i = 0;
+
+	insn[i++] = BPF_MOV32_IMM(BPF_REG_0, 0);
+	while (i < self->retval) {
+		uint64_t val = bpf_semi_rand_get();
+		struct bpf_insn tmp[2] = { BPF_LD_IMM64(BPF_REG_1, val) };
+
+		res ^= val;
+		insn[i++] = tmp[0];
+		insn[i++] = tmp[1];
+		insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+	}
+	insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+	insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
+	insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+	insn[i] = BPF_EXIT_INSN();
+	res ^= (res >> 32);
+	self->retval = (uint32_t)res;
+}
+
+static struct bpf_test tests[] = {
+	{
+		"add+sub+mul",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_2, 3),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 3),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = -3,
+	},
+	{
+		"DIV32 by 0, zero check 1",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"DIV32 by 0, zero check 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"DIV64 by 0, zero check",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"MOD32 by 0, zero check 1",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"MOD32 by 0, zero check 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"MOD64 by 0, zero check",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, 1),
+			BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"DIV32 by 0, zero check ok, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 2),
+			BPF_MOV32_IMM(BPF_REG_2, 16),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 8,
+	},
+	{
+		"DIV32 by 0, zero check 1, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV32 by 0, zero check 2, cls",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV64 by 0, zero check, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"MOD32 by 0, zero check ok, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_MOV32_IMM(BPF_REG_1, 3),
+			BPF_MOV32_IMM(BPF_REG_2, 5),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"MOD32 by 0, zero check 1, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"MOD32 by 0, zero check 2, cls",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"MOD64 by 0, zero check 1, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_0, 2),
+			BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"MOD64 by 0, zero check 2, cls",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_0, -1),
+			BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = -1,
+	},
+	/* Just make sure that JITs used udiv/umod as otherwise we get
+	 * an exception from INT_MIN/-1 overflow similarly as with div
+	 * by zero.
+	 */
+	{
+		"DIV32 overflow, check 1",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, -1),
+			BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV32 overflow, check 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+			BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV64 overflow, check 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, -1),
+			BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+			BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"DIV64 overflow, check 2",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+			BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"MOD32 overflow, check 1",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_1, -1),
+			BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+			BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = INT_MIN,
+	},
+	{
+		"MOD32 overflow, check 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+			BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = INT_MIN,
+	},
+	{
+		"MOD64 overflow, check 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, -1),
+			BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"MOD64 overflow, check 2",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"xor32 zero extend check",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_2, -1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+			BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff),
+			BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2),
+			BPF_MOV32_IMM(BPF_REG_0, 2),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+			BPF_MOV32_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"empty prog",
+		.insns = {
+		},
+		.errstr = "unknown opcode 00",
+		.result = REJECT,
+	},
+	{
+		"only exit insn",
+		.insns = {
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"unreachable",
+		.insns = {
+			BPF_EXIT_INSN(),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "unreachable",
+		.result = REJECT,
+	},
+	{
+		"unreachable2",
+		.insns = {
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "unreachable",
+		.result = REJECT,
+	},
+	{
+		"out of range jump",
+		.insns = {
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "jump out of range",
+		.result = REJECT,
+	},
+	{
+		"out of range jump2",
+		.insns = {
+			BPF_JMP_IMM(BPF_JA, 0, 0, -2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "jump out of range",
+		.result = REJECT,
+	},
+	{
+		"test1 ld_imm64",
+		.insns = {
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_LD_IMM64(BPF_REG_0, 0),
+			BPF_LD_IMM64(BPF_REG_0, 0),
+			BPF_LD_IMM64(BPF_REG_0, 1),
+			BPF_LD_IMM64(BPF_REG_0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid BPF_LD_IMM insn",
+		.errstr_unpriv = "R1 pointer comparison",
+		.result = REJECT,
+	},
+	{
+		"test2 ld_imm64",
+		.insns = {
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_LD_IMM64(BPF_REG_0, 0),
+			BPF_LD_IMM64(BPF_REG_0, 0),
+			BPF_LD_IMM64(BPF_REG_0, 1),
+			BPF_LD_IMM64(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid BPF_LD_IMM insn",
+		.errstr_unpriv = "R1 pointer comparison",
+		.result = REJECT,
+	},
+	{
+		"test3 ld_imm64",
+		.insns = {
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+			BPF_LD_IMM64(BPF_REG_0, 0),
+			BPF_LD_IMM64(BPF_REG_0, 0),
+			BPF_LD_IMM64(BPF_REG_0, 1),
+			BPF_LD_IMM64(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
+	{
+		"test4 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
+	{
+		"test5 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
+	{
+		"test6 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+			BPF_RAW_INSN(0, 0, 0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"test7 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+			BPF_RAW_INSN(0, 0, 0, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"test8 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 1, 1),
+			BPF_RAW_INSN(0, 0, 0, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "uses reserved fields",
+		.result = REJECT,
+	},
+	{
+		"test9 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+			BPF_RAW_INSN(0, 0, 0, 1, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
+	{
+		"test10 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+			BPF_RAW_INSN(0, BPF_REG_1, 0, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
+	{
+		"test11 ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+			BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
+	{
+		"test12 ld_imm64",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1),
+			BPF_RAW_INSN(0, 0, 0, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "not pointing to valid bpf_map",
+		.result = REJECT,
+	},
+	{
+		"test13 ld_imm64",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1),
+			BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_ld_imm64 insn",
+		.result = REJECT,
+	},
+	{
+		"arsh32 on imm",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "unknown opcode c4",
+	},
+	{
+		"arsh32 on reg",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_IMM(BPF_REG_1, 5),
+			BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "unknown opcode cc",
+	},
+	{
+		"arsh64 on imm",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_0, 5),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"arsh64 on reg",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_IMM(BPF_REG_1, 5),
+			BPF_ALU64_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"no bpf_exit",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
+		},
+		.errstr = "not an exit",
+		.result = REJECT,
+	},
+	{
+		"loop (back-edge)",
+		.insns = {
+			BPF_JMP_IMM(BPF_JA, 0, 0, -1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "back-edge",
+		.result = REJECT,
+	},
+	{
+		"loop2 (back-edge)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "back-edge",
+		.result = REJECT,
+	},
+	{
+		"conditional loop",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "back-edge",
+		.result = REJECT,
+	},
+	{
+		"read uninitialized register",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"read invalid register",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_0, -1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R15 is invalid",
+		.result = REJECT,
+	},
+	{
+		"program doesn't init R0 before exit",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"program doesn't init R0 before exit in all branches",
+		.insns = {
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.errstr_unpriv = "R1 pointer comparison",
+		.result = REJECT,
+	},
+	{
+		"stack out of bounds",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack",
+		.result = REJECT,
+	},
+	{
+		"invalid call insn1",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "unknown opcode 8d",
+		.result = REJECT,
+	},
+	{
+		"invalid call insn2",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "BPF_CALL uses reserved",
+		.result = REJECT,
+	},
+	{
+		"invalid function call",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid func unknown#1234567",
+		.result = REJECT,
+	},
+	{
+		"uninitialized stack1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 2 },
+		.errstr = "invalid indirect read from stack",
+		.result = REJECT,
+	},
+	{
+		"uninitialized stack2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid read from stack",
+		.result = REJECT,
+	},
+	{
+		"invalid fp arithmetic",
+		/* If this gets ever changed, make sure JITs can deal with it. */
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 subtraction from stack pointer",
+		.result = REJECT,
+	},
+	{
+		"non-invalid fp arithmetic",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"invalid argument register",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_cgroup_classid),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_cgroup_classid),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"non-invalid argument register",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_cgroup_classid),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_cgroup_classid),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"check valid spill/fill",
+		.insns = {
+			/* spill R1(ctx) into stack */
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			/* fill it back into R2 */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+			/* should be able to access R0 = *(R2 + 8) */
+			/* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R0 leaks addr",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.retval = POINTER_VALUE,
+	},
+	{
+		"check valid spill/fill, skb mark",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.result_unpriv = ACCEPT,
+	},
+	{
+		"check corrupted spill/fill",
+		.insns = {
+			/* spill R1(ctx) into stack */
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			/* mess up with R1 pointer on stack */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23),
+			/* fill back into R0 is fine for priv.
+			 * R0 now becomes SCALAR_VALUE.
+			 */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			/* Load from R0 should fail. */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "attempt to corrupt spilled",
+		.errstr = "R0 invalid mem access 'inv",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"check corrupted spill/fill, LSB",
+		.insns = {
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "attempt to corrupt spilled",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.retval = POINTER_VALUE,
+	},
+	{
+		"check corrupted spill/fill, MSB",
+		.insns = {
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "attempt to corrupt spilled",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.retval = POINTER_VALUE,
+	},
+	{
+		"invalid src register in STX",
+		.insns = {
+			BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R15 is invalid",
+		.result = REJECT,
+	},
+	{
+		"invalid dst register in STX",
+		.insns = {
+			BPF_STX_MEM(BPF_B, 14, BPF_REG_10, -1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R14 is invalid",
+		.result = REJECT,
+	},
+	{
+		"invalid dst register in ST",
+		.insns = {
+			BPF_ST_MEM(BPF_B, 14, -1, -1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R14 is invalid",
+		.result = REJECT,
+	},
+	{
+		"invalid src register in LDX",
+		.insns = {
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, 12, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R12 is invalid",
+		.result = REJECT,
+	},
+	{
+		"invalid dst register in LDX",
+		.insns = {
+			BPF_LDX_MEM(BPF_B, 11, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R11 is invalid",
+		.result = REJECT,
+	},
+	{
+		"junk insn",
+		.insns = {
+			BPF_RAW_INSN(0, 0, 0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "unknown opcode 00",
+		.result = REJECT,
+	},
+	{
+		"junk insn2",
+		.insns = {
+			BPF_RAW_INSN(1, 0, 0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "BPF_LDX uses reserved fields",
+		.result = REJECT,
+	},
+	{
+		"junk insn3",
+		.insns = {
+			BPF_RAW_INSN(-1, 0, 0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "unknown opcode ff",
+		.result = REJECT,
+	},
+	{
+		"junk insn4",
+		.insns = {
+			BPF_RAW_INSN(-1, -1, -1, -1, -1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "unknown opcode ff",
+		.result = REJECT,
+	},
+	{
+		"junk insn5",
+		.insns = {
+			BPF_RAW_INSN(0x7f, -1, -1, -1, -1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "BPF_ALU uses reserved fields",
+		.result = REJECT,
+	},
+	{
+		"misaligned read from stack",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned stack access",
+		.result = REJECT,
+	},
+	{
+		"invalid map_fd for function call",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_delete_elem),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "fd 0 is not pointing to valid bpf_map",
+		.result = REJECT,
+	},
+	{
+		"don't check return value before access",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R0 invalid mem access 'map_value_or_null'",
+		.result = REJECT,
+	},
+	{
+		"access memory with incorrect alignment",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "misaligned value access",
+		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
+		"sometimes access memory with incorrect alignment",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R0 invalid mem access",
+		.errstr_unpriv = "R0 leaks addr",
+		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
+		"jump test 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 pointer comparison",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"jump test 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 14),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 5),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 pointer comparison",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"jump test 3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 19),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 15),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -40),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 7),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_delete_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 24 },
+		.errstr_unpriv = "R1 pointer comparison",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.retval = -ENOENT,
+	},
+	{
+		"jump test 4",
+		.insns = {
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 pointer comparison",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"jump test 5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 pointer comparison",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"access skb fields ok",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, pkt_type)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, queue_mapping)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, protocol)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_present)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_tci)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, napi_id)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"access skb fields bad1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"access skb fields bad2",
+		.insns = {
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, pkt_type)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "different pointers",
+		.errstr_unpriv = "R1 pointer comparison",
+		.result = REJECT,
+	},
+	{
+		"access skb fields bad3",
+		.insns = {
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, pkt_type)),
+			BPF_EXIT_INSN(),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -12),
+		},
+		.fixup_map1 = { 6 },
+		.errstr = "different pointers",
+		.errstr_unpriv = "R1 pointer comparison",
+		.result = REJECT,
+	},
+	{
+		"access skb fields bad4",
+		.insns = {
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 3),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -13),
+		},
+		.fixup_map1 = { 7 },
+		.errstr = "different pointers",
+		.errstr_unpriv = "R1 pointer comparison",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff family",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, family)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff local_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff local_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_port)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"invalid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_port)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"valid access __sk_buff family",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, family)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff local_ip4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_ip6[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff local_ip6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_ip6[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, remote_port)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access __sk_buff remote_port",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, local_port)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"invalid access of tc_classid for SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+		.errstr = "invalid bpf_context access",
+	},
+	{
+		"invalid access of skb->mark for SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.result =  REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+		.errstr = "invalid bpf_context access",
+	},
+	{
+		"check skb->mark is not writeable by SK_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.result =  REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+		.errstr = "invalid bpf_context access",
+	},
+	{
+		"check skb->tc_index is writeable by SK_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"check skb->priority is writeable by SK_SKB",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"direct packet read for SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"direct packet write for SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"overlapping checks for direct packet access SK_SKB",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access family in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, family)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"valid access remote_ip4 in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"valid access local_ip4 in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_ip4)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"valid access remote_port in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_port)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"valid access local_port in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_port)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"valid access remote_ip6 in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, remote_ip6[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"valid access local_ip6 in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_ip6[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_ip6[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_ip6[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_ip6[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_SKB,
+	},
+	{
+		"invalid 64B read of family in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, family)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"invalid read past end of SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, local_port) + 4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"invalid read offset in SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, family) + 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"direct packet read for SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"direct packet write for SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"overlapping checks for direct packet access SK_MSG",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data)),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct sk_msg_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SK_MSG,
+	},
+	{
+		"check skb->mark is not writeable by sockets",
+		.insns = {
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.errstr_unpriv = "R1 leaks addr",
+		.result = REJECT,
+	},
+	{
+		"check skb->tc_index is not writeable by sockets",
+		.insns = {
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.errstr_unpriv = "R1 leaks addr",
+		.result = REJECT,
+	},
+	{
+		"check cb access: byte",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 2),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 3),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1]) + 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1]) + 2),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1]) + 3),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2]) + 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2]) + 2),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2]) + 3),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3]) + 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3]) + 2),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3]) + 3),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 2),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) + 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) + 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) + 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1]) + 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1]) + 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1]) + 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2]) + 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2]) + 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2]) + 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3]) + 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3]) + 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3]) + 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 3),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"__sk_buff->hash, offset 0, byte store not permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, hash)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"__sk_buff->tc_index, offset 3, byte store not permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tc_index) + 3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check skb->hash byte load permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash)),
+#else
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 3),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check skb->hash byte load permitted 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check skb->hash byte load permitted 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check skb->hash byte load permitted 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 3),
+#else
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash)),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check cb access: byte, wrong type",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"check cb access: half",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 2),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1]) + 2),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2]) + 2),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3]) + 2),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 2),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) + 2),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1]) + 2),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2]) + 2),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3]) + 2),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4]) + 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check cb access: half, unaligned",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned context access",
+		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
+		"check __sk_buff->hash, offset 0, half store not permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, hash)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check __sk_buff->tc_index, offset 2, half store not permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tc_index) + 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check skb->hash half load permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash)),
+#else
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 2),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check skb->hash half load permitted 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 2),
+#else
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash)),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check skb->hash half load not permitted, unaligned 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 1),
+#else
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 3),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"check skb->hash half load not permitted, unaligned 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 3),
+#else
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, hash) + 1),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"check cb access: half, wrong type",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"check cb access: word",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check cb access: word, unaligned 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0]) + 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned context access",
+		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
+		"check cb access: word, unaligned 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned context access",
+		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
+		"check cb access: word, unaligned 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned context access",
+		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
+		"check cb access: word, unaligned 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4]) + 3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned context access",
+		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
+		"check cb access: double",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"check cb access: double, unaligned 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[1])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned context access",
+		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
+		"check cb access: double, unaligned 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "misaligned context access",
+		.result = REJECT,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
+		"check cb access: double, oob 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double, oob 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check __sk_buff->ifindex dw store not permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, ifindex)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check __sk_buff->ifindex dw load not permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, ifindex)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check cb access: double, wrong type",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"check out of range skb->cb access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0]) + 256),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.errstr_unpriv = "",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_ACT,
+	},
+	{
+		"write skb fields from socket prog",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.errstr_unpriv = "R1 leaks addr",
+		.result_unpriv = REJECT,
+	},
+	{
+		"write skb fields from tc_cls_act prog",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"PTR_TO_STACK store/load",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 0xfaceb00c,
+	},
+	{
+		"PTR_TO_STACK store/load - bad alignment on off",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
+	},
+	{
+		"PTR_TO_STACK store/load - bad alignment on reg",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
+	},
+	{
+		"PTR_TO_STACK store/load - out of bounds low",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000),
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid stack off=-79992 size=8",
+		.errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+	},
+	{
+		"PTR_TO_STACK store/load - out of bounds high",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid stack off=0 size=8",
+	},
+	{
+		"unpriv: return pointer",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "R0 leaks addr",
+		.retval = POINTER_VALUE,
+	},
+	{
+		"unpriv: add const to pointer",
+		.insns = {
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: add pointer to pointer",
+		.insns = {
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R1 pointer += pointer",
+	},
+	{
+		"unpriv: neg pointer",
+		.insns = {
+			BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "R1 pointer arithmetic",
+	},
+	{
+		"unpriv: cmp pointer with const",
+		.insns = {
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "R1 pointer comparison",
+	},
+	{
+		"unpriv: cmp pointer with pointer",
+		.insns = {
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "R10 pointer comparison",
+	},
+	{
+		"unpriv: check that printk is disallowed",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_trace_printk),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "unknown func bpf_trace_printk#6",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: pass pointer to helper function",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_update_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R4 leaks addr",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: indirectly pass pointer on stack to helper function",
+		.insns = {
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "invalid indirect read from stack off -8+0 size 8",
+		.result = REJECT,
+	},
+	{
+		"unpriv: mangle pointer on stack 1",
+		.insns = {
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+			BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "attempt to corrupt spilled",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: mangle pointer on stack 2",
+		.insns = {
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "attempt to corrupt spilled",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: read pointer from stack in small chunks",
+		.insns = {
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid size",
+		.result = REJECT,
+	},
+	{
+		"unpriv: write pointer into ctx",
+		.insns = {
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 leaks addr",
+		.result_unpriv = REJECT,
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"unpriv: spill/fill of ctx",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: spill/fill of ctx 2",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_hash_recalc),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"unpriv: spill/fill of ctx 3",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_hash_recalc),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R1 type=fp expected=ctx",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"unpriv: spill/fill of ctx 4",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_10,
+				     BPF_REG_0, -8, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_hash_recalc),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R1 type=inv expected=ctx",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"unpriv: spill/fill of different pointers stx",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 42),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "same insn cannot be used with different pointers",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"unpriv: spill/fill of different pointers ldx",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+				      -(__s32)offsetof(struct bpf_perf_event_data,
+						       sample_period) - 8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct bpf_perf_event_data,
+					     sample_period)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "same insn cannot be used with different pointers",
+		.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+	},
+	{
+		"unpriv: write pointer into map elem value",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"alu32: mov u32 const",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_7, 0),
+			BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1),
+			BPF_MOV32_REG(BPF_REG_0, BPF_REG_7),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R7 invalid mem access 'inv'",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"unpriv: partial copy of pointer",
+		.insns = {
+			BPF_MOV32_REG(BPF_REG_1, BPF_REG_10),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R10 partial copy",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: pass pointer to tail_call",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 1 },
+		.errstr_unpriv = "R3 leaks addr into helper",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: cmp map pointer with zero",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 1 },
+		.errstr_unpriv = "R1 pointer comparison",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: write into frame pointer",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_10, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "frame pointer is read only",
+		.result = REJECT,
+	},
+	{
+		"unpriv: spill/fill frame pointer",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "frame pointer is read only",
+		.result = REJECT,
+	},
+	{
+		"unpriv: cmp of frame pointer",
+		.insns = {
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R10 pointer comparison",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: adding of fp, reg",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: adding of fp, imm",
+		.insns = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
+		BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+		BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"unpriv: cmp of stack pointer",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R2 pointer comparison",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"runtime/jit: tail_call within bounds, prog once",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 1 },
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"runtime/jit: tail_call within bounds, prog loop",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 1 },
+		.result = ACCEPT,
+		.retval = 41,
+	},
+	{
+		"runtime/jit: tail_call within bounds, no prog",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 1 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"runtime/jit: tail_call out of bounds",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 256),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 1 },
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"runtime/jit: pass negative index to tail_call",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, -1),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 1 },
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"runtime/jit: pass > 32bit index to tail_call",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 2 },
+		.result = ACCEPT,
+		.retval = 42,
+		/* Verifier rewrite for unpriv skips tail call here. */
+		.retval_unpriv = 2,
+	},
+	{
+		"stack pointer arithmetic",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
+			BPF_ST_MEM(0, BPF_REG_2, 4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+			BPF_ST_MEM(0, BPF_REG_2, 4, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"raw_stack: no skb_load_bytes",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			/* Call to skb_load_bytes() omitted. */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid read from stack off -8+0 size 8",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, negative len",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R4 min value is negative",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, negative len 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, ~0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R4 min value is negative",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, zero len",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid stack type R3",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, no init",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, init",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, spilled regs around bounds",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,  8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6,  8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, spilled regs corruption",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R0 invalid mem access 'inv'",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"raw_stack: skb_load_bytes, spilled regs corruption 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,  0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,  8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6,  8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6,  0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3,
+				    offsetof(struct __sk_buff, pkt_type)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R3 invalid mem access 'inv'",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"raw_stack: skb_load_bytes, spilled regs + data",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,  0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,  8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6,  8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6,  0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
+				    offsetof(struct __sk_buff, priority)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, invalid access 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid stack type R3 off=-513 access_size=8",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, invalid access 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid stack type R3 off=-1 access_size=8",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, invalid access 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R4 min value is negative",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, invalid access 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, invalid access 5",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, invalid access 6",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid stack type R3 off=-512 access_size=0",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"raw_stack: skb_load_bytes, large access",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_4, 512),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"context stores via ST",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "BPF_ST stores into R1 context is not allowed",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"context stores via XADD",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1,
+				     BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "BPF_XADD stores into R1 context is not allowed",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 15),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 7),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_3, 12),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 14),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test3",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access off=76",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+	},
+	{
+		"direct packet access: test4 (write)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test5 (pkt_end >= reg, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test6 (pkt_end >= reg, bad access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid access to packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test7 (pkt_end >= reg, both accesses)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid access to packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test8 (double test, variant 1)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test9 (double test, variant 2)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test10 (write invalid)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid access to packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test11 (shift, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 144),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 3),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
+	},
+	{
+		"direct packet access: test12 (and, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 144),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
+	},
+	{
+		"direct packet access: test13 (branches, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 13),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_MOV64_IMM(BPF_REG_4, 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 14),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 24),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
+	},
+	{
+		"direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
+			BPF_MOV64_IMM(BPF_REG_5, 12),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 4),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
+	},
+	{
+		"direct packet access: test15 (spill with xadd)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 4096),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+			BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 invalid mem access 'inv'",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"direct packet access: test16 (arith on data_end)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 16),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test17 (pruning, alignment)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+			BPF_JMP_A(-6),
+		},
+		.errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+	},
+	{
+		"direct packet access: test18 (imm += pkt_ptr, 1)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_IMM(BPF_REG_0, 8),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test19 (imm += pkt_ptr, 2)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			BPF_MOV64_IMM(BPF_REG_4, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+			BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test20 (x += pkt_ptr, 1)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"direct packet access: test21 (x += pkt_ptr, 2)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
+			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"direct packet access: test22 (x += pkt_ptr, 3)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"direct packet access: test23 (x += pkt_ptr, 4)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+			    offsetof(struct __sk_buff, mark)),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_0, 31),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"direct packet access: test24 (x += pkt_ptr, 5)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_0, 64),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"direct packet access: test25 (marking on <, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test26 (marking on <, bad access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 3),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"direct packet access: test27 (marking on <=, good access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 1,
+	},
+	{
+		"direct packet access: test28 (marking on <=, bad access)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test1, valid packet_ptr range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_update_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 5 },
+		.result_unpriv = ACCEPT,
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test2, unchecked packet_ptr",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 1 },
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test3, variable add",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+					offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+					offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
+			BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 11 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test4, packet_ptr with bad range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 7 },
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test5, packet_ptr with too short range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 6 },
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"helper access to packet: test6, cls valid packet_ptr range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_update_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 5 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test7, cls unchecked packet_ptr",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 1 },
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test8, cls variable add",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+					offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+					offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
+			BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 11 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test9, cls packet_ptr with bad range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 7 },
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test10, cls packet_ptr with too short range",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 6 },
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test11, cls unsuitable helper 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 42),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_store_bytes),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "helper access to the packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test12, cls unsuitable helper 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 4),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "helper access to the packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test13, cls helper ok",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test14, cls helper ok sub",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test15, cls helper fail sub",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test16, cls helper fail range 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test17, cls helper fail range 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, -9),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R2 min value is negative",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test18, cls helper fail range 3",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, ~0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R2 min value is negative",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test19, cls helper range zero",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test20, pkt end as input",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R1 type=pkt_end expected=fp",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to packet: test21, wrong reg",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+			BPF_MOV64_IMM(BPF_REG_2, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_diff),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"valid map access into an array with a constant",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"valid map access into an array with a register",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_1, 4),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"valid map access into an array with a variable",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"valid map access into an array with a signed variable",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"invalid map access into an array with a constant",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=48 size=8",
+		.result = REJECT,
+	},
+	{
+		"invalid map access into an array with a register",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R0 min value is outside of the array range",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"invalid map access into an array with a variable",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"invalid map access into an array with no floor check",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.errstr = "R0 unbounded memory access",
+		.result_unpriv = REJECT,
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"invalid map access into an array with a invalid max check",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.errstr = "invalid access to map value, value_size=48 off=44 size=8",
+		.result_unpriv = REJECT,
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"invalid map access into an array with a invalid max check",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+				    offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3, 11 },
+		.errstr = "R0 pointer += pointer",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"valid cgroup storage access",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid cgroup storage access 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 1 },
+		.result = REJECT,
+		.errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid cgroup storage access 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "fd 1 is not pointing to valid bpf_map",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid per-cgroup storage access 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=64 off=256 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid cgroup storage access 4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=64 off=-2 size=4",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"invalid cgroup storage access 5",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 7),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "get_local_storage() doesn't support non-zero flags",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"invalid cgroup storage access 6",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_local_storage),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_cgroup_storage = { 1 },
+		.result = REJECT,
+		.errstr = "get_local_storage() doesn't support non-zero flags",
+		.errstr_unpriv = "R2 leaks addr into helper function",
+		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+	},
+	{
+		"multiple registers share map_lookup_elem result",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"alu ops on ptr_to_map_value_or_null, 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"invalid memory access with multiple map_lookup_elem calls",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.result = REJECT,
+		.errstr = "R4 !read_ok",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"valid indirect map_lookup_elem access with 2nd lookup in branch",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_2, 10),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS
+	},
+	{
+		"invalid map access from else condition",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES-1, 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R0 unbounded memory access",
+		.result = REJECT,
+		.errstr_unpriv = "R0 leaks addr",
+		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"constant register |= constant should keep constant type",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+			BPF_MOV64_IMM(BPF_REG_2, 34),
+			BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"constant register |= constant should not bypass stack boundary checks",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+			BPF_MOV64_IMM(BPF_REG_2, 34),
+			BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-48 access_size=58",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"constant register |= constant register should keep constant type",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+			BPF_MOV64_IMM(BPF_REG_2, 34),
+			BPF_MOV64_IMM(BPF_REG_4, 13),
+			BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"constant register |= constant register should not bypass stack boundary checks",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+			BPF_MOV64_IMM(BPF_REG_2, 34),
+			BPF_MOV64_IMM(BPF_REG_4, 24),
+			BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-48 access_size=58",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"invalid direct packet write for LWT_IN",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "cannot write into packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"invalid direct packet write for LWT_OUT",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "cannot write into packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_OUT,
+	},
+	{
+		"direct packet write for LWT_XMIT",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
+	},
+	{
+		"direct packet read for LWT_IN",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"direct packet read for LWT_OUT",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_OUT,
+	},
+	{
+		"direct packet read for LWT_XMIT",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
+	},
+	{
+		"overlapping checks for direct packet access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
+	},
+	{
+		"make headroom for LWT_XMIT",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 34),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
+			/* split for s390 to succeed */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_2, 42),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_XMIT,
+	},
+	{
+		"invalid access of tc_classid for LWT_IN",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+	},
+	{
+		"invalid access of tc_classid for LWT_OUT",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+	},
+	{
+		"invalid access of tc_classid for LWT_XMIT",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid)),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+	},
+	{
+		"leak pointer into ctx 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 2 },
+		.errstr_unpriv = "R2 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = REJECT,
+		.errstr = "BPF_XADD stores into R1 context is not allowed",
+	},
+	{
+		"leak pointer into ctx 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R10 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = REJECT,
+		.errstr = "BPF_XADD stores into R1 context is not allowed",
+	},
+	{
+		"leak pointer into ctx 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
+				      offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 1 },
+		.errstr_unpriv = "R2 leaks addr into ctx",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"leak pointer into map val",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr_unpriv = "R6 leaks addr into mem",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"helper access to map: full range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: partial range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: empty range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=0 size=0",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: out-of-bound range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=0 size=56",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: negative range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, -8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R2 min value is negative",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): full range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): partial range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): empty range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=4 size=0",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): out-of-bound range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo) + 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=4 size=52",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): negative range (> adjustment)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2, -8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R2 min value is negative",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const imm): negative range (< adjustment)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R2 min value is negative",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): full range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				offsetof(struct test_val, foo)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): partial range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				offsetof(struct test_val, foo)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): empty range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R1 min value is outside of the array range",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): out-of-bound range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				offsetof(struct test_val, foo)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo) + 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=4 size=52",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): negative range (> adjustment)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				offsetof(struct test_val, foo)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, -8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R2 min value is negative",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via const reg): negative range (< adjustment)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				offsetof(struct test_val, foo)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R2 min value is negative",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via variable): full range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				offsetof(struct test_val, foo), 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via variable): partial range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				offsetof(struct test_val, foo), 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via variable): empty range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				offsetof(struct test_val, foo), 3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R1 min value is outside of the array range",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via variable): no max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R1 unbounded memory access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to adjusted map (via variable): wrong max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				offsetof(struct test_val, foo), 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2,
+				sizeof(struct test_val) -
+				offsetof(struct test_val, foo) + 1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "invalid access to map value, value_size=48 off=4 size=45",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using <, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using <, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 unbounded memory access",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using <=, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using <=, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 unbounded memory access",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<, good access 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 min value is negative",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<=, good access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<=, good access 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to map: bounds check using s<=, bad access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = REJECT,
+		.errstr = "R1 min value is negative",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map lookup helper access to map",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 8 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map update helper access to map",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 10 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map update helper access to map: wrong size",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.fixup_map3 = { 10 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=8 off=0 size=16",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const imm)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+				      offsetof(struct other_val, bar)),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 9 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const imm): out-of-bound 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+				      sizeof(struct other_val) - 4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 9 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=12 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const imm): out-of-bound 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 9 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const reg)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				      offsetof(struct other_val, bar)),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 10 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const reg): out-of-bound 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3,
+				      sizeof(struct other_val) - 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 10 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=12 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via const reg): out-of-bound 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_3, -4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 10 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via variable)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				    offsetof(struct other_val, bar), 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 11 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via variable): no max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 10 },
+		.result = REJECT,
+		.errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map helper access to adjusted map (via variable): wrong max check",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+				    offsetof(struct other_val, bar) + 1, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map3 = { 3, 11 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=16 off=9 size=8",
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"map element value is preserved across register spilling",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value or null is marked on register spilling",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value store of cleared call register",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R1 !read_ok",
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"map element value with unaligned store",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23),
+			BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3),
+			BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22),
+			BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23),
+			BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"map element value with unaligned load",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"map element value illegal alu op, 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R0 bitwise operator &= on pointer",
+		.result = REJECT,
+	},
+	{
+		"map element value illegal alu op, 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R0 32-bit pointer arithmetic prohibited",
+		.result = REJECT,
+	},
+	{
+		"map element value illegal alu op, 3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R0 pointer arithmetic with /= operator",
+		.result = REJECT,
+	},
+	{
+		"map element value illegal alu op, 4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 pointer arithmetic prohibited",
+		.errstr = "invalid mem access 'inv'",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"map element value illegal alu op, 5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_IMM(BPF_REG_3, 4096),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R0 invalid mem access 'inv'",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"map element value is preserved across register spilling",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0,
+				offsetof(struct test_val, foo)),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.result = ACCEPT,
+		.result_unpriv = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, bitwise AND, zero included",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid indirect read from stack off -64+0 size 64",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, bitwise AND + JMP, wrong max",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-64 access_size=65",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP, correct bounds",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP (signed), correct bounds",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP, bounds + offset",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-64 access_size=65",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP, wrong max",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid stack type R1 off=-64 access_size=65",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP, no max check",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		/* because max wasn't checked, signed min is negative */
+		.errstr = "R2 min value is negative, either use unsigned or 'var &= const'",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP, no min check",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid indirect read from stack off -64+0 size 64",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: stack, JMP (signed), no min check",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 min value is negative",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: map, JMP, correct bounds",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+				sizeof(struct test_val), 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: map, JMP, wrong max",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+				sizeof(struct test_val) + 1, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 4 },
+		.errstr = "invalid access to map value, value_size=48 off=0 size=49",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: map adjusted, JMP, correct bounds",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
+			BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+				sizeof(struct test_val) - 20, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: map adjusted, JMP, wrong max",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+				sizeof(struct test_val) - 19, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 4 },
+		.errstr = "R1 min value is outside of the array range",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 type=inv expected=fp",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 0 /* csum_diff of 64-byte packet */,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 type=inv expected=fp",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 type=inv expected=fp",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: 8 bytes leak",
+		.insns = {
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid indirect read from stack off -64+32 size 64",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"helper access to variable memory: 8 bytes no leak (init memory)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_EMIT_CALL(BPF_FUNC_probe_read),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"invalid and of negative number",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R0 max value is outside of the array range",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"invalid range check",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+			BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2),
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
+			BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1),
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1),
+			BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1),
+			BPF_MOV32_IMM(BPF_REG_3, 1),
+			BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9),
+			BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+			BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+			BPF_MOV64_REG(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr = "R0 max value is outside of the array range",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"map in map access",
+		.insns = {
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_in_map = { 3 },
+		.result = ACCEPT,
+	},
+	{
+		"invalid inner map pointer",
+		.insns = {
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_in_map = { 3 },
+		.errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+		.result = REJECT,
+	},
+	{
+		"forgot null checking on the inner map pointer",
+		.insns = {
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map_in_map = { 3 },
+		.errstr = "R1 type=map_value_or_null expected=map_ptr",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_abs: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 0),
+			BPF_LD_ABS(BPF_W, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"ld_abs: tests on r6 and skb data reload helper",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_6, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_vlan_push),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 42 /* ultimate return value */,
+	},
+	{
+		"ld_ind: check calling conv, r1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_4, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R5 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"ld_ind: check calling conv, r7",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"check bpf_perf_event_data->sample_period byte load permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct bpf_perf_event_data, sample_period)),
+#else
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct bpf_perf_event_data, sample_period) + 7),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+	},
+	{
+		"check bpf_perf_event_data->sample_period half load permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct bpf_perf_event_data, sample_period)),
+#else
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct bpf_perf_event_data, sample_period) + 6),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+	},
+	{
+		"check bpf_perf_event_data->sample_period word load permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct bpf_perf_event_data, sample_period)),
+#else
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct bpf_perf_event_data, sample_period) + 4),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+	},
+	{
+		"check bpf_perf_event_data->sample_period dword load permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct bpf_perf_event_data, sample_period)),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_PERF_EVENT,
+	},
+	{
+		"check skb->data half load not permitted",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+#else
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data) + 2),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+	},
+	{
+		"check skb->tc_classid half load not permitted for lwt prog",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid)),
+#else
+			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_classid) + 2),
+#endif
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid bpf_context access",
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"bounds checks mixing signed and unsigned, positive bounds",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 2),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+			BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 3",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+			BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 4",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 5",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 6",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_6, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_load_bytes),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R4 min value is negative, either use unsigned",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 7",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 8",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 9",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 10",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 11",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+			/* Dead branch. */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 12",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -6),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 13",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, 2),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_7, 1),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 14",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -1),
+			BPF_MOV64_IMM(BPF_REG_8, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3),
+			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+	},
+	{
+		"bounds checks mixing signed and unsigned, variant 15",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+			BPF_MOV64_IMM(BPF_REG_2, -6),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "unbounded min value",
+		.result = REJECT,
+		.result_unpriv = REJECT,
+	},
+	{
+		"subtraction bounds (map value) variant 1",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 7),
+			BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 5),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 56),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R0 max value is outside of the array range",
+		.result = REJECT,
+	},
+	{
+		"subtraction bounds (map value) variant 2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6),
+			BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+		.errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
+		.result = REJECT,
+	},
+	{
+		"bounds check based on zero-extended MOV",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			/* r2 = 0x0000'0000'ffff'ffff */
+			BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
+			/* r2 = 0 */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+			/* no-op */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			/* access at offset 0 */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT
+	},
+	{
+		"bounds check based on sign-extended MOV. test1",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			/* r2 = 0xffff'ffff'ffff'ffff */
+			BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+			/* r2 = 0xffff'ffff */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+			/* r0 = <oob pointer> */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			/* access to OOB pointer */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "map_value pointer and 4294967295",
+		.result = REJECT
+	},
+	{
+		"bounds check based on sign-extended MOV. test2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			/* r2 = 0xffff'ffff'ffff'ffff */
+			BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+			/* r2 = 0xfff'ffff */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
+			/* r0 = <oob pointer> */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			/* access to OOB pointer */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R0 min value is outside of the array range",
+		.result = REJECT
+	},
+	{
+		"bounds check based on reg_off + var_off + insn_off. test1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "value_size=8 off=1073741825",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"bounds check based on reg_off + var_off + insn_off. test2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 4 },
+		.errstr = "value 1073741823",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"bounds check after truncation of non-boundary-crossing range",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			/* r1 = [0x00, 0xff] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			/* r2 = 0x10'0000'0000 */
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
+			/* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+			/* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+			/* r1 = [0x00, 0xff] */
+			BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
+			/* r1 = 0 */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+			/* no-op */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* access at offset 0 */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT
+	},
+	{
+		"bounds check after truncation of boundary-crossing range (1)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			/* r1 = [0x00, 0xff] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0xffff'ff80, 0x1'0000'007f] */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0xffff'ff80, 0xffff'ffff] or
+			 *      [0x0000'0000, 0x0000'007f]
+			 */
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0x00, 0xff] or
+			 *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+			 */
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = 0 or
+			 *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+			 */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+			/* no-op or OOB pointer computation */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* potentially OOB access */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		/* not actually fully unbounded, but the bound is very high */
+		.errstr = "R0 unbounded memory access",
+		.result = REJECT
+	},
+	{
+		"bounds check after truncation of boundary-crossing range (2)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+			/* r1 = [0x00, 0xff] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0xffff'ff80, 0x1'0000'007f] */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0xffff'ff80, 0xffff'ffff] or
+			 *      [0x0000'0000, 0x0000'007f]
+			 * difference to previous test: truncation via MOV32
+			 * instead of ALU32.
+			 */
+			BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = [0x00, 0xff] or
+			 *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+			 */
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+			/* r1 = 0 or
+			 *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+			 */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+			/* no-op or OOB pointer computation */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* potentially OOB access */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		/* not actually fully unbounded, but the bound is very high */
+		.errstr = "R0 unbounded memory access",
+		.result = REJECT
+	},
+	{
+		"bounds check after wrapping 32-bit addition",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+			/* r1 = 0x7fff'ffff */
+			BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
+			/* r1 = 0xffff'fffe */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+			/* r1 = 0 */
+			BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
+			/* no-op */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* access at offset 0 */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT
+	},
+	{
+		"bounds check after shift with oversized count operand",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_2, 32),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			/* r1 = (u32)1 << (u32)32 = ? */
+			BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
+			/* r1 = [0x0000, 0xffff] */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
+			/* computes unknown pointer, potentially OOB */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* potentially OOB access */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R0 max value is outside of the array range",
+		.result = REJECT
+	},
+	{
+		"bounds check after right shift of maybe-negative number",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+			/* r1 = [0x00, 0xff] */
+			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			/* r1 = [-0x01, 0xfe] */
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+			/* r1 = 0 or 0xff'ffff'ffff'ffff */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+			/* r1 = 0 or 0xffff'ffff'ffff */
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+			/* computes unknown pointer, potentially OOB */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			/* potentially OOB access */
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			/* exit */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R0 unbounded memory access",
+		.result = REJECT
+	},
+	{
+		"bounds check map access with off+size signed 32bit overflow. test1",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_JMP_A(0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "map_value pointer and 2147483646",
+		.result = REJECT
+	},
+	{
+		"bounds check map access with off+size signed 32bit overflow. test2",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_JMP_A(0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "pointer offset 1073741822",
+		.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
+		.result = REJECT
+	},
+	{
+		"bounds check map access with off+size signed 32bit overflow. test3",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+			BPF_JMP_A(0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "pointer offset -1073741822",
+		.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
+		.result = REJECT
+	},
+	{
+		"bounds check map access with off+size signed 32bit overflow. test4",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_1, 1000000),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+			BPF_JMP_A(0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "map_value pointer and 1000000000000",
+		.result = REJECT
+	},
+	{
+		"pointer/scalar confusion in state equality check (way 1)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_JMP_A(1),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+			BPF_JMP_A(0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+		.retval = POINTER_VALUE,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "R0 leaks addr as return value"
+	},
+	{
+		"pointer/scalar confusion in state equality check (way 2)",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+			BPF_JMP_A(1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = ACCEPT,
+		.retval = POINTER_VALUE,
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "R0 leaks addr as return value"
+	},
+	{
+		"variable-offset ctx access",
+		.insns = {
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* Make it small and 4-byte aligned */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+			/* add it to skb.  We now have either &skb->len or
+			 * &skb->pkt_type, but we don't know which
+			 */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+			/* dereference it */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "variable ctx access var_off=(0x0; 0x4)",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"variable-offset stack access",
+		.insns = {
+			/* Fill the top 8 bytes of the stack */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* Make it small and 4-byte aligned */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+			/* add it to fp.  We now have either fp-4 or fp-8, but
+			 * we don't know which
+			 */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+			/* dereference it */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"indirect variable-offset stack access, out of bound",
+		.insns = {
+			/* Fill the top 8 bytes of the stack */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* Make it small and 4-byte aligned */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+			/* add it to fp.  We now have either fp-4 or fp-8, but
+			 * we don't know which
+			 */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+			/* dereference it indirectly */
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 5 },
+		.errstr = "invalid stack type R2 var_off",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"indirect variable-offset stack access, max_off+size > max_initialized",
+		.insns = {
+		/* Fill only the second from top 8 bytes of the stack. */
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+		/* Get an unknown value. */
+		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+		/* Make it small and 4-byte aligned. */
+		BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+		BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+		/* Add it to fp.  We now have either fp-12 or fp-16, but we don't know
+		 * which. fp-12 size 8 is partially uninitialized stack.
+		 */
+		BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+		/* Dereference it indirectly. */
+		BPF_LD_MAP_FD(BPF_REG_1, 0),
+		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 5 },
+		.errstr = "invalid indirect read from stack var_off",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"indirect variable-offset stack access, min_off < min_initialized",
+		.insns = {
+		/* Fill only the top 8 bytes of the stack. */
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+		/* Get an unknown value */
+		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+		/* Make it small and 4-byte aligned. */
+		BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+		BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+		/* Add it to fp.  We now have either fp-12 or fp-16, but we don't know
+		 * which. fp-16 size 8 is partially uninitialized stack.
+		 */
+		BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+		/* Dereference it indirectly. */
+		BPF_LD_MAP_FD(BPF_REG_1, 0),
+		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 5 },
+		.errstr = "invalid indirect read from stack var_off",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"indirect variable-offset stack access, ok",
+		.insns = {
+		/* Fill the top 16 bytes of the stack. */
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+		/* Get an unknown value. */
+		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+		/* Make it small and 4-byte aligned. */
+		BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+		BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+		/* Add it to fp.  We now have either fp-12 or fp-16, we don't know
+		 * which, but either way it points to initialized stack.
+		 */
+		BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+		/* Dereference it indirectly. */
+		BPF_LD_MAP_FD(BPF_REG_1, 0),
+		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 6 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"direct stack access with 32-bit wraparound. test1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_EXIT_INSN()
+		},
+		.errstr = "fp pointer and 2147483647",
+		.result = REJECT
+	},
+	{
+		"direct stack access with 32-bit wraparound. test2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_EXIT_INSN()
+		},
+		.errstr = "fp pointer and 1073741823",
+		.result = REJECT
+	},
+	{
+		"direct stack access with 32-bit wraparound. test3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+			BPF_EXIT_INSN()
+		},
+		.errstr = "fp pointer offset 1073741822",
+		.errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+		.result = REJECT
+	},
+	{
+		"liveness pruning and write screening",
+		.insns = {
+			/* Get an unknown value */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+			/* branch conditions teach us nothing about R2 */
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_LWT_IN,
+	},
+	{
+		"varlen_map_value_access pruning",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_MOV32_IMM(BPF_REG_1, 0),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 3 },
+		.errstr_unpriv = "R0 leaks addr",
+		.errstr = "R0 unbounded memory access",
+		.result_unpriv = REJECT,
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"invalid 64-bit BPF_END",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			{
+				.code  = BPF_ALU64 | BPF_END | BPF_TO_LE,
+				.dst_reg = BPF_REG_0,
+				.src_reg = 0,
+				.off   = 0,
+				.imm   = 32,
+			},
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "unknown opcode d7",
+		.result = REJECT,
+	},
+	{
+		"XDP, using ifindex from netdev",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, ingress_ifindex)),
+			BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.retval = 1,
+	},
+	{
+		"meta access, test1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet, off=-8",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test3",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test5",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3),
+			BPF_MOV64_IMM(BPF_REG_2, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_xdp_adjust_meta),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R3 !read_ok",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test7",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test8",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test9",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test10",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_IMM(BPF_REG_5, 42),
+			BPF_MOV64_IMM(BPF_REG_6, 24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
+			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "invalid access to packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test11",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_IMM(BPF_REG_5, 42),
+			BPF_MOV64_IMM(BPF_REG_6, 24),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
+			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"meta access, test12",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"arithmetic ops make PTR_TO_CTX unusable",
+		.insns = {
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+				      offsetof(struct __sk_buff, data) -
+				      offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "dereference of modified ctx ptr",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"pkt_end - pkt_start is allowed",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = TEST_DATA_LEN,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"XDP pkt read, pkt_end mangling, bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"XDP pkt read, pkt_end mangling, bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"XDP pkt read, pkt_data' > pkt_end, good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' > pkt_end, bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' > pkt_end, bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end > pkt_data', good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end > pkt_data', bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end > pkt_data', bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' < pkt_end, good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' < pkt_end, bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' < pkt_end, bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end < pkt_data', good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end < pkt_data', bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end < pkt_data', bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' >= pkt_end, good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' >= pkt_end, bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' >= pkt_end, bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end >= pkt_data', good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end >= pkt_data', bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end >= pkt_data', bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' <= pkt_end, good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' <= pkt_end, bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data' <= pkt_end, bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end <= pkt_data', good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end <= pkt_data', bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_end <= pkt_data', bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' > pkt_data, good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' > pkt_data, bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' > pkt_data, bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data > pkt_meta', good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data > pkt_meta', bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data > pkt_meta', bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' < pkt_data, good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' < pkt_data, bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' < pkt_data, bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data < pkt_meta', good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data < pkt_meta', bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data < pkt_meta', bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' >= pkt_data, good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' >= pkt_data, bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' >= pkt_data, bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data >= pkt_meta', good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data >= pkt_meta', bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data >= pkt_meta', bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' <= pkt_data, good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' <= pkt_data, bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_meta' <= pkt_data, bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data <= pkt_meta', good access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data <= pkt_meta', bad access 1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"XDP pkt read, pkt_data <= pkt_meta', bad access 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data_meta)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R1 offset is outside of the packet",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"check deducing bounds from const, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 has pointer with unsupported alu operation",
+		.errstr = "R0 tried to subtract pointer from scalar",
+		.result = REJECT,
+	},
+	{
+		"check deducing bounds from const, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 has pointer with unsupported alu operation",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"check deducing bounds from const, 3",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 has pointer with unsupported alu operation",
+		.errstr = "R0 tried to subtract pointer from scalar",
+		.result = REJECT,
+	},
+	{
+		"check deducing bounds from const, 4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R6 has pointer with unsupported alu operation",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+	{
+		"check deducing bounds from const, 5",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 has pointer with unsupported alu operation",
+		.errstr = "R0 tried to subtract pointer from scalar",
+		.result = REJECT,
+	},
+	{
+		"check deducing bounds from const, 6",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 has pointer with unsupported alu operation",
+		.errstr = "R0 tried to subtract pointer from scalar",
+		.result = REJECT,
+	},
+	{
+		"check deducing bounds from const, 7",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, ~0),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 has pointer with unsupported alu operation",
+		.errstr = "dereference of modified ctx ptr",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"check deducing bounds from const, 8",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, ~0),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 has pointer with unsupported alu operation",
+		.errstr = "dereference of modified ctx ptr",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"check deducing bounds from const, 9",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "R1 has pointer with unsupported alu operation",
+		.errstr = "R0 tried to subtract pointer from scalar",
+		.result = REJECT,
+	},
+	{
+		"check deducing bounds from const, 10",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
+			/* Marks reg as unknown. */
+			BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "math between ctx pointer and register with unbounded min value is not allowed",
+		.result = REJECT,
+	},
+	{
+		"bpf_exit with invalid return code. test1",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 has value (0x0; 0xffffffff)",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"bpf_exit with invalid return code. test2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"bpf_exit with invalid return code. test3",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 has value (0x0; 0x3)",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"bpf_exit with invalid return code. test4",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"bpf_exit with invalid return code. test5",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 has value (0x2; 0x0)",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"bpf_exit with invalid return code. test6",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 is not a known value (ctx)",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"bpf_exit with invalid return code. test7",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4),
+			BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "R0 has unknown scalar value",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+	},
+	{
+		"calls: basic sanity",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: not on unpriviledged",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"calls: div by 0 in subprog",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_IMM(BPF_REG_2, 0),
+			BPF_MOV32_IMM(BPF_REG_3, 1),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"calls: multiple ret types in subprog 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_MOV32_IMM(BPF_REG_0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "R0 invalid mem access 'inv'",
+	},
+	{
+		"calls: multiple ret types in subprog 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
+				    offsetof(struct __sk_buff, data)),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 16 },
+		.result = REJECT,
+		.errstr = "R0 min value is outside of the array range",
+	},
+	{
+		"calls: overlapping caller/callee",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "last insn is not an exit or jmp",
+		.result = REJECT,
+	},
+	{
+		"calls: wrong recursive calls",
+		.insns = {
+			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "jump out of range",
+		.result = REJECT,
+	},
+	{
+		"calls: wrong src reg",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "BPF_CALL uses reserved fields",
+		.result = REJECT,
+	},
+	{
+		"calls: wrong off value",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "BPF_CALL uses reserved fields",
+		.result = REJECT,
+	},
+	{
+		"calls: jump back loop",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge from insn 0 to 0",
+		.result = REJECT,
+	},
+	{
+		"calls: conditional call",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "jump out of range",
+		.result = REJECT,
+	},
+	{
+		"calls: conditional call 2",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: conditional call 3",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+			BPF_MOV64_IMM(BPF_REG_0, 3),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge from insn",
+		.result = REJECT,
+	},
+	{
+		"calls: conditional call 4",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -5),
+			BPF_MOV64_IMM(BPF_REG_0, 3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: conditional call 5",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+			BPF_MOV64_IMM(BPF_REG_0, 3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge from insn",
+		.result = REJECT,
+	},
+	{
+		"calls: conditional call 6",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge from insn",
+		.result = REJECT,
+	},
+	{
+		"calls: using r0 returned by callee",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.result = ACCEPT,
+	},
+	{
+		"calls: using uninit r0 from callee",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "!read_ok",
+		.result = REJECT,
+	},
+	{
+		"calls: callee is using r1",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_ACT,
+		.result = ACCEPT,
+		.retval = TEST_DATA_LEN,
+	},
+	{
+		"calls: callee using args1",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "allowed for root only",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.retval = POINTER_VALUE,
+	},
+	{
+		"calls: callee using wrong args2",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "R2 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"calls: callee using two args",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+				    offsetof(struct __sk_buff, len)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6,
+				    offsetof(struct __sk_buff, len)),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "allowed for root only",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+		.retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN,
+	},
+	{
+		"calls: callee changing pkt pointers",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			/* clear_all_pkt_pointers() has to walk all frames
+			 * to make sure that pkt pointers in the caller
+			 * are cleared when callee is calling a helper that
+			 * adjusts packet size
+			 */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_xdp_adjust_head),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "R6 invalid mem access 'inv'",
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: two calls with args",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = TEST_DATA_LEN + TEST_DATA_LEN,
+	},
+	{
+		"calls: calls with stack arith",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"calls: calls with misaligned stack access",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+		.errstr = "misaligned stack access",
+		.result = REJECT,
+	},
+	{
+		"calls: calls control flow, jump test",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 43),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 43,
+	},
+	{
+		"calls: calls control flow, jump test 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 43),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "jump out of range from insn 1 to 4",
+		.result = REJECT,
+	},
+	{
+		"calls: two calls with bad jump",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "jump out of range from insn 11 to 9",
+		.result = REJECT,
+	},
+	{
+		"calls: recursive call. test1",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge",
+		.result = REJECT,
+	},
+	{
+		"calls: recursive call. test2",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "back-edge",
+		.result = REJECT,
+	},
+	{
+		"calls: unreachable code",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "unreachable insn 6",
+		.result = REJECT,
+	},
+	{
+		"calls: invalid call",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "invalid destination",
+		.result = REJECT,
+	},
+	{
+		"calls: invalid call 2",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "invalid destination",
+		.result = REJECT,
+	},
+	{
+		"calls: jumping across function bodies. test1",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "jump out of range",
+		.result = REJECT,
+	},
+	{
+		"calls: jumping across function bodies. test2",
+		.insns = {
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "jump out of range",
+		.result = REJECT,
+	},
+	{
+		"calls: call without exit",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "not an exit",
+		.result = REJECT,
+	},
+	{
+		"calls: call into middle of ld_imm64",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "last insn",
+		.result = REJECT,
+	},
+	{
+		"calls: call into middle of other call",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "last insn",
+		.result = REJECT,
+	},
+	{
+		"calls: ld_abs with changing ctx data in callee",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_vlan_push),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
+		.result = REJECT,
+	},
+	{
+		"calls: two calls with bad fallthrough",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, len)),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+		.errstr = "not an exit",
+		.result = REJECT,
+	},
+	{
+		"calls: two calls with stack read",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.result = ACCEPT,
+	},
+	{
+		"calls: two calls with stack write",
+		.insns = {
+			/* main prog */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
+			/* write into stack frame of main prog */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* read from stack frame of main prog */
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.result = ACCEPT,
+	},
+	{
+		"calls: stack overflow using two frames (pre-call access)",
+		.insns = {
+			/* prog 1 */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* prog 2 */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.errstr = "combined stack size",
+		.result = REJECT,
+	},
+	{
+		"calls: stack overflow using two frames (post-call access)",
+		.insns = {
+			/* prog 1 */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_EXIT_INSN(),
+
+			/* prog 2 */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.errstr = "combined stack size",
+		.result = REJECT,
+	},
+	{
+		"calls: stack depth check using three frames. test1",
+		.insns = {
+			/* main */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* A */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+			BPF_EXIT_INSN(),
+			/* B */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		/* stack_main=32, stack_A=256, stack_B=64
+		 * and max(main+A, main+A+B) < 512
+		 */
+		.result = ACCEPT,
+	},
+	{
+		"calls: stack depth check using three frames. test2",
+		.insns = {
+			/* main */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* A */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+			BPF_EXIT_INSN(),
+			/* B */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		/* stack_main=32, stack_A=64, stack_B=256
+		 * and max(main+A, main+A+B) < 512
+		 */
+		.result = ACCEPT,
+	},
+	{
+		"calls: stack depth check using three frames. test3",
+		.insns = {
+			/* main */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* A */
+			BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1),
+			BPF_EXIT_INSN(),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+			/* B */
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		/* stack_main=64, stack_A=224, stack_B=256
+		 * and max(main+A, main+A+B) > 512
+		 */
+		.errstr = "combined stack",
+		.result = REJECT,
+	},
+	{
+		"calls: stack depth check using three frames. test4",
+		/* void main(void) {
+		 *   func1(0);
+		 *   func1(1);
+		 *   func2(1);
+		 * }
+		 * void func1(int alloc_or_recurse) {
+		 *   if (alloc_or_recurse) {
+		 *     frame_pointer[-300] = 1;
+		 *   } else {
+		 *     func2(alloc_or_recurse);
+		 *   }
+		 * }
+		 * void func2(int alloc_or_recurse) {
+		 *   if (alloc_or_recurse) {
+		 *     frame_pointer[-300] = 1;
+		 *   }
+		 * }
+		 */
+		.insns = {
+			/* main */
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* A */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_EXIT_INSN(),
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */
+			BPF_EXIT_INSN(),
+			/* B */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.result = REJECT,
+		.errstr = "combined stack",
+	},
+	{
+		"calls: stack depth check using three frames. test5",
+		.insns = {
+			/* main */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */
+			BPF_EXIT_INSN(),
+			/* A */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */
+			BPF_EXIT_INSN(),
+			/* B */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */
+			BPF_EXIT_INSN(),
+			/* C */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */
+			BPF_EXIT_INSN(),
+			/* D */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */
+			BPF_EXIT_INSN(),
+			/* E */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */
+			BPF_EXIT_INSN(),
+			/* F */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */
+			BPF_EXIT_INSN(),
+			/* G */
+			BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */
+			BPF_EXIT_INSN(),
+			/* H */
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.errstr = "call stack",
+		.result = REJECT,
+	},
+	{
+		"calls: spill into caller stack frame",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.errstr = "cannot spill",
+		.result = REJECT,
+	},
+	{
+		"calls: write into caller stack frame",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"calls: write into callee stack frame",
+		.insns = {
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.errstr = "cannot return stack pointer",
+		.result = REJECT,
+	},
+	{
+		"calls: two calls with stack write and void return",
+		.insns = {
+			/* main prog */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* write into stack frame of main prog */
+			BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+			BPF_EXIT_INSN(), /* void return */
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.result = ACCEPT,
+	},
+	{
+		"calls: ambiguous return value",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.errstr_unpriv = "allowed for root only",
+		.result_unpriv = REJECT,
+		.errstr = "R0 !read_ok",
+		.result = REJECT,
+	},
+	{
+		"calls: two calls that return map_value",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			/* fetch secound map_value_ptr from the stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			/* call 3rd function twice */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* first time with fp-8 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			/* second time with fp-16 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			/* lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr into stack frame of main prog */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(), /* return 0 */
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.fixup_map1 = { 23 },
+		.result = ACCEPT,
+	},
+	{
+		"calls: two calls that return map_value with bool condition",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			/* call 3rd function twice */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* first time with fp-8 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			/* second time with fp-16 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+			/* fetch secound map_value_ptr from the stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			/* lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(), /* return 0 */
+			/* write map_value_ptr into stack frame of main prog */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(), /* return 1 */
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.fixup_map1 = { 23 },
+		.result = ACCEPT,
+	},
+	{
+		"calls: two calls that return map_value with incorrect bool check",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			/* call 3rd function twice */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* first time with fp-8 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			/* second time with fp-16 */
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			/* fetch secound map_value_ptr from the stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			/* lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(), /* return 0 */
+			/* write map_value_ptr into stack frame of main prog */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(), /* return 1 */
+		},
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.fixup_map1 = { 23 },
+		.result = REJECT,
+		.errstr = "invalid read from stack off -16+0 size 8",
+	},
+	{
+		"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* 1st lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+
+			/* 2nd lookup from map */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_9, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-16 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+
+			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),  /* 34 */
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* if arg2 == 1 do *arg1 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+			/* if arg4 == 1 do *arg3 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 12, 22 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* 1st lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+
+			/* 2nd lookup from map */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_9, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-16 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+
+			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),  /* 34 */
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* if arg2 == 1 do *arg1 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+			/* if arg4 == 1 do *arg3 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 12, 22 },
+		.result = ACCEPT,
+	},
+	{
+		"calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* 1st lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+
+			/* 2nd lookup from map */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_9, 0),  // 26
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			/* write map_value_ptr into stack frame of main prog at fp-16 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+
+			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34
+			BPF_JMP_IMM(BPF_JA, 0, 0, -30),
+
+			/* subprog 2 */
+			/* if arg2 == 1 do *arg1 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+			/* if arg4 == 1 do *arg3 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, -8),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 12, 22 },
+		.result = REJECT,
+		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: two calls that receive map_value_ptr_or_null via arg. test1",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* 1st lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+
+			/* 2nd lookup from map */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_9, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+
+			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* if arg2 == 1 do *arg1 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+			/* if arg4 == 1 do *arg3 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 12, 22 },
+		.result = ACCEPT,
+	},
+	{
+		"calls: two calls that receive map_value_ptr_or_null via arg. test2",
+		.insns = {
+			/* main prog */
+			/* pass fp-16, fp-8 into a function */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+			/* 1st lookup from map */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_8, 1),
+
+			/* 2nd lookup from map */
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_9, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_MOV64_IMM(BPF_REG_9, 1),
+
+			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* subprog 2 */
+			/* if arg2 == 1 do *arg1 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+			/* if arg4 == 0 do *arg3 = 0 */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2),
+			/* fetch map_value_ptr from the stack of this function */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+			/* write into map value */
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map1 = { 12, 22 },
+		.result = REJECT,
+		.errstr = "R0 invalid mem access 'inv'",
+	},
+	{
+		"calls: pkt_ptr spill into caller stack",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			/* spill unchecked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			/* now the pkt range is verified, read pkt_ptr from stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = POINTER_VALUE,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			/* Marking is still kept, but not in all cases safe. */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			/* spill unchecked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			/* now the pkt range is verified, read pkt_ptr from stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "invalid access to packet",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 3",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			/* Marking is still kept and safe here. */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			/* spill unchecked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* now the pkt range is verified, read pkt_ptr from stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 4",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			/* Check marking propagated. */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			/* spill unchecked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 1,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 5",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			/* spill checked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "same insn cannot be used with different",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 6",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			/* spill checked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 7",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			/* spill checked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "R4 invalid mem access",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 8",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+			/* spill checked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: pkt_ptr spill into caller stack 9",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct __sk_buff, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct __sk_buff, data_end)),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+			BPF_MOV64_IMM(BPF_REG_5, 0),
+			/* spill unchecked pkt_ptr into stack of caller */
+			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_5, 1),
+			/* don't read back pkt_ptr from stack here */
+			/* write 4 bytes into packet */
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.errstr = "invalid access to packet",
+		.result = REJECT,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"calls: caller stack init to zero or map_value_or_null",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+			/* fetch map_value_or_null or const_zero from stack */
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+			/* store into map_value */
+			BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+			BPF_EXIT_INSN(),
+
+			/* subprog 1 */
+			/* if (ctx == 0) return; */
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
+			/* else bpf_map_lookup() and *(fp - 8) = r0 */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			/* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 13 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"calls: stack init to zero and pruning",
+		.insns = {
+			/* first make allocated_stack 16 byte */
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+			/* now fork the execution such that the false branch
+			 * of JGT insn will be verified second and it skisp zero
+			 * init of fp-8 stack slot. If stack liveness marking
+			 * is missing live_read marks from call map_lookup
+			 * processing then pruning will incorrectly assume
+			 * that fp-8 stack slot was unused in the fall-through
+			 * branch and will accept the program incorrectly
+			 */
+			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 6 },
+		.errstr = "invalid indirect read from stack off -8+0 size 8",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_XDP,
+	},
+	{
+		"calls: two calls returning different map pointers for lookup (hash, array)",
+		.insns = {
+			/* main prog */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+			BPF_CALL_REL(11),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_CALL_REL(12),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			/* subprog 1 */
+			BPF_LD_MAP_FD(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* subprog 2 */
+			BPF_LD_MAP_FD(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map2 = { 13 },
+		.fixup_map4 = { 16 },
+		.result = ACCEPT,
+		.retval = 1,
+	},
+	{
+		"calls: two calls returning different map pointers for lookup (hash, map in map)",
+		.insns = {
+			/* main prog */
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+			BPF_CALL_REL(11),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+			BPF_CALL_REL(12),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+				   offsetof(struct test_val, foo)),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			/* subprog 1 */
+			BPF_LD_MAP_FD(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+			/* subprog 2 */
+			BPF_LD_MAP_FD(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.fixup_map_in_map = { 16 },
+		.fixup_map4 = { 13 },
+		.result = REJECT,
+		.errstr = "R0 invalid mem access 'map_ptr'",
+	},
+	{
+		"cond: two branches returning different map pointers for lookup (tail, tail)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog1 = { 5 },
+		.fixup_prog2 = { 2 },
+		.result_unpriv = REJECT,
+		.errstr_unpriv = "tail_call abusing map_ptr",
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"cond: two branches returning same map pointers for lookup (tail, tail)",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+			BPF_LD_MAP_FD(BPF_REG_2, 0),
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_tail_call),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_prog2 = { 2, 5 },
+		.result_unpriv = ACCEPT,
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"search pruning: all branches should be verified (nop operation)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_A(1),
+			BPF_MOV64_IMM(BPF_REG_4, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
+			BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2),
+			BPF_MOV64_IMM(BPF_REG_6, 0),
+			BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "R6 invalid mem access 'inv'",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"search pruning: all branches should be verified (invalid stack access)",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
+			BPF_JMP_A(1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24),
+			BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.errstr = "invalid read from stack off -16+0 size 8",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"jit: lsh, rsh, arsh by 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_IMM(BPF_REG_1, 0xff),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+			BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+			BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: mov32 for ldimm64, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+			BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: mov32 for ldimm64, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+			BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"jit: various mul tests",
+		.insns = {
+			BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+			BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+			BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+			BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+			BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+			BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+			BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+			BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+			BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+			BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 2),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.retval = 2,
+	},
+	{
+		"xadd/w check unaligned stack",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "misaligned stack access off",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"xadd/w check unaligned map",
+		.insns = {
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_1, 1),
+			BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map1 = { 3 },
+		.result = REJECT,
+		.errstr = "misaligned value access off",
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
+	{
+		"xadd/w check unaligned pkt",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+				    offsetof(struct xdp_md, data)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+				    offsetof(struct xdp_md, data_end)),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+			BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 99),
+			BPF_JMP_IMM(BPF_JA, 0, 0, 6),
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+			BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0),
+			BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1),
+			BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1),
+			BPF_EXIT_INSN(),
+		},
+		.result = REJECT,
+		.errstr = "BPF_XADD stores into R2 packet",
+		.prog_type = BPF_PROG_TYPE_XDP,
+		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+	},
+	{
+		"xadd/w check whether src/dst got mangled, 1",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
+			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 3,
+	},
+	{
+		"xadd/w check whether src/dst got mangled, 2",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_0, 1),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+			BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
+			BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+			BPF_EXIT_INSN(),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.retval = 3,
+	},
+	{
+		"bpf_get_stack return R0 within range",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+			BPF_LD_MAP_FD(BPF_REG_1, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_map_lookup_elem),
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+			BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)/2),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)/2),
+			BPF_MOV64_IMM(BPF_REG_4, 256),
+			BPF_EMIT_CALL(BPF_FUNC_get_stack),
+			BPF_MOV64_IMM(BPF_REG_1, 0),
+			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
+			BPF_JMP_REG(BPF_JSLT, BPF_REG_8, BPF_REG_1, 16),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+			BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)/2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_3, BPF_REG_9),
+			BPF_MOV64_IMM(BPF_REG_4, 0),
+			BPF_EMIT_CALL(BPF_FUNC_get_stack),
+			BPF_EXIT_INSN(),
+		},
+		.fixup_map2 = { 4 },
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	},
+	{
+		"ld_abs: invalid op 1",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_DW, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "unknown opcode",
+	},
+	{
+		"ld_abs: invalid op 2",
+		.insns = {
+			BPF_MOV32_IMM(BPF_REG_0, 256),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_IND(BPF_DW, BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "unknown opcode",
+	},
+	{
+		"ld_abs: nmap reduced",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_H, 12),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28),
+			BPF_LD_ABS(BPF_H, 12),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26),
+			BPF_MOV32_IMM(BPF_REG_0, 18),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64),
+			BPF_LD_IND(BPF_W, BPF_REG_7, 14),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60),
+			BPF_MOV32_IMM(BPF_REG_0, 280971478),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60),
+			BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15),
+			BPF_LD_ABS(BPF_H, 12),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13),
+			BPF_MOV32_IMM(BPF_REG_0, 22),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
+			BPF_LD_IND(BPF_H, BPF_REG_7, 14),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52),
+			BPF_MOV32_IMM(BPF_REG_0, 17366),
+			BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48),
+			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52),
+			BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+			BPF_MOV32_IMM(BPF_REG_0, 256),
+			BPF_EXIT_INSN(),
+			BPF_MOV32_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0,
+			0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+			0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 256,
+	},
+	{
+		"ld_abs: div + abs, test 1",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_B, 3),
+			BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
+			BPF_LD_ABS(BPF_B, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+			BPF_LD_IND(BPF_B, BPF_REG_8, -70),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			10, 20, 30, 40, 50,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 10,
+	},
+	{
+		"ld_abs: div + abs, test 2",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_B, 3),
+			BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
+			BPF_LD_ABS(BPF_B, 128),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+			BPF_LD_IND(BPF_B, BPF_REG_8, -70),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			10, 20, 30, 40, 50,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"ld_abs: div + abs, test 3",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+			BPF_LD_ABS(BPF_B, 3),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			10, 20, 30, 40, 50,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"ld_abs: div + abs, test 4",
+		.insns = {
+			BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+			BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+			BPF_LD_ABS(BPF_B, 256),
+			BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			10, 20, 30, 40, 50,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0,
+	},
+	{
+		"ld_abs: vlan + abs, test 1",
+		.insns = { },
+		.data = {
+			0x34,
+		},
+		.fill_helper = bpf_fill_ld_abs_vlan_push_pop,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 0xbef,
+	},
+	{
+		"ld_abs: vlan + abs, test 2",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_MOV64_IMM(BPF_REG_6, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+			BPF_MOV64_IMM(BPF_REG_2, 1),
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_skb_vlan_push),
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+			BPF_LD_ABS(BPF_B, 0),
+			BPF_LD_ABS(BPF_H, 0),
+			BPF_LD_ABS(BPF_W, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 42),
+			BPF_EXIT_INSN(),
+		},
+		.data = {
+			0x34,
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 42,
+	},
+	{
+		"ld_abs: jump around ld_abs",
+		.insns = { },
+		.data = {
+			10, 11,
+		},
+		.fill_helper = bpf_fill_jump_around_ld_abs,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 10,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 1",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 4090,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 2",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 2047,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 3",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 511,
+	},
+	{
+		"ld_dw: xor semi-random 64 bit imms, test 4",
+		.insns = { },
+		.data = { },
+		.fill_helper = bpf_fill_rand_ld_dw,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+		.retval = 5,
+	},
+	{
+		"pass unmodified ctx pointer to helper",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_update),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"pass modified ctx pointer to helper, 1",
+		.insns = {
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_update),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "dereference of modified ctx ptr",
+	},
+	{
+		"pass modified ctx pointer to helper, 2",
+		.insns = {
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_get_socket_cookie),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.result_unpriv = REJECT,
+		.result = REJECT,
+		.errstr_unpriv = "dereference of modified ctx ptr",
+		.errstr = "dereference of modified ctx ptr",
+	},
+	{
+		"pass modified ctx pointer to helper, 3",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+				     BPF_FUNC_csum_update),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.errstr = "variable ctx access var_off=(0x0; 0x4)",
+	},
+	{
+		"mov64 src == dst",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_2, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_2),
+			// Check bounds are OK
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"mov64 src != dst",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 0),
+			BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
+			// Check bounds are OK
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = ACCEPT,
+	},
+	{
+		"calls: ctx read at start of subprog",
+		.insns = {
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+			BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+		.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+		.result_unpriv = REJECT,
+		.result = ACCEPT,
+	},
+};
+
+static int probe_filter_length(const struct bpf_insn *fp)
+{
+	int len;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].imm != 0)
+			break;
+	return len + 1;
+}
+
+static int create_map(uint32_t type, uint32_t size_key,
+		      uint32_t size_value, uint32_t max_elem)
+{
+	int fd;
+
+	fd = bpf_create_map(type, size_key, size_value, max_elem,
+			    type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0);
+	if (fd < 0)
+		printf("Failed to create hash map '%s'!\n", strerror(errno));
+
+	return fd;
+}
+
+static int create_prog_dummy1(enum bpf_map_type prog_type)
+{
+	struct bpf_insn prog[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 42),
+		BPF_EXIT_INSN(),
+	};
+
+	return bpf_load_program(prog_type, prog,
+				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx)
+{
+	struct bpf_insn prog[] = {
+		BPF_MOV64_IMM(BPF_REG_3, idx),
+		BPF_LD_MAP_FD(BPF_REG_2, mfd),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+			     BPF_FUNC_tail_call),
+		BPF_MOV64_IMM(BPF_REG_0, 41),
+		BPF_EXIT_INSN(),
+	};
+
+	return bpf_load_program(prog_type, prog,
+				ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem,
+			     int p1key)
+{
+	int p2key = 1;
+	int mfd, p1fd, p2fd;
+
+	mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
+			     sizeof(int), max_elem, 0);
+	if (mfd < 0) {
+		printf("Failed to create prog array '%s'!\n", strerror(errno));
+		return -1;
+	}
+
+	p1fd = create_prog_dummy1(prog_type);
+	p2fd = create_prog_dummy2(prog_type, mfd, p2key);
+	if (p1fd < 0 || p2fd < 0)
+		goto out;
+	if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
+		goto out;
+	if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
+		goto out;
+	close(p2fd);
+	close(p1fd);
+
+	return mfd;
+out:
+	close(p2fd);
+	close(p1fd);
+	close(mfd);
+	return -1;
+}
+
+static int create_map_in_map(void)
+{
+	int inner_map_fd, outer_map_fd;
+
+	inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+				      sizeof(int), 1, 0);
+	if (inner_map_fd < 0) {
+		printf("Failed to create array '%s'!\n", strerror(errno));
+		return inner_map_fd;
+	}
+
+	outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
+					     sizeof(int), inner_map_fd, 1, 0);
+	if (outer_map_fd < 0)
+		printf("Failed to create array of maps '%s'!\n",
+		       strerror(errno));
+
+	close(inner_map_fd);
+
+	return outer_map_fd;
+}
+
+static int create_cgroup_storage(void)
+{
+	int fd;
+
+	fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+			    sizeof(struct bpf_cgroup_storage_key),
+			    TEST_DATA_LEN, 0, 0);
+	if (fd < 0)
+		printf("Failed to create array '%s'!\n", strerror(errno));
+
+	return fd;
+}
+
+static char bpf_vlog[UINT_MAX >> 8];
+
+static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
+			  struct bpf_insn *prog, int *map_fds)
+{
+	int *fixup_map1 = test->fixup_map1;
+	int *fixup_map2 = test->fixup_map2;
+	int *fixup_map3 = test->fixup_map3;
+	int *fixup_map4 = test->fixup_map4;
+	int *fixup_prog1 = test->fixup_prog1;
+	int *fixup_prog2 = test->fixup_prog2;
+	int *fixup_map_in_map = test->fixup_map_in_map;
+	int *fixup_cgroup_storage = test->fixup_cgroup_storage;
+
+	if (test->fill_helper)
+		test->fill_helper(test);
+
+	/* Allocating HTs with 1 elem is fine here, since we only test
+	 * for verifier and not do a runtime lookup, so the only thing
+	 * that really matters is value size in this case.
+	 */
+	if (*fixup_map1) {
+		map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+					sizeof(long long), 1);
+		do {
+			prog[*fixup_map1].imm = map_fds[0];
+			fixup_map1++;
+		} while (*fixup_map1);
+	}
+
+	if (*fixup_map2) {
+		map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+					sizeof(struct test_val), 1);
+		do {
+			prog[*fixup_map2].imm = map_fds[1];
+			fixup_map2++;
+		} while (*fixup_map2);
+	}
+
+	if (*fixup_map3) {
+		map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+					sizeof(struct other_val), 1);
+		do {
+			prog[*fixup_map3].imm = map_fds[2];
+			fixup_map3++;
+		} while (*fixup_map3);
+	}
+
+	if (*fixup_map4) {
+		map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+					sizeof(struct test_val), 1);
+		do {
+			prog[*fixup_map4].imm = map_fds[3];
+			fixup_map4++;
+		} while (*fixup_map4);
+	}
+
+	if (*fixup_prog1) {
+		map_fds[4] = create_prog_array(prog_type, 4, 0);
+		do {
+			prog[*fixup_prog1].imm = map_fds[4];
+			fixup_prog1++;
+		} while (*fixup_prog1);
+	}
+
+	if (*fixup_prog2) {
+		map_fds[5] = create_prog_array(prog_type, 8, 7);
+		do {
+			prog[*fixup_prog2].imm = map_fds[5];
+			fixup_prog2++;
+		} while (*fixup_prog2);
+	}
+
+	if (*fixup_map_in_map) {
+		map_fds[6] = create_map_in_map();
+		do {
+			prog[*fixup_map_in_map].imm = map_fds[6];
+			fixup_map_in_map++;
+		} while (*fixup_map_in_map);
+	}
+
+	if (*fixup_cgroup_storage) {
+		map_fds[7] = create_cgroup_storage();
+		do {
+			prog[*fixup_cgroup_storage].imm = map_fds[7];
+			fixup_cgroup_storage++;
+		} while (*fixup_cgroup_storage);
+	}
+}
+
+static int set_admin(bool admin)
+{
+	cap_t caps;
+	const cap_value_t cap_val = CAP_SYS_ADMIN;
+	int ret = -1;
+
+	caps = cap_get_proc();
+	if (!caps) {
+		perror("cap_get_proc");
+		return -1;
+	}
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+				admin ? CAP_SET : CAP_CLEAR)) {
+		perror("cap_set_flag");
+		goto out;
+	}
+	if (cap_set_proc(caps)) {
+		perror("cap_set_proc");
+		goto out;
+	}
+	ret = 0;
+out:
+	if (cap_free(caps))
+		perror("cap_free");
+	return ret;
+}
+
+static void do_test_single(struct bpf_test *test, bool unpriv,
+			   int *passes, int *errors)
+{
+	int fd_prog, expected_ret, alignment_prevented_execution;
+	int prog_len, prog_type = test->prog_type;
+	struct bpf_insn *prog = test->insns;
+	int map_fds[MAX_NR_MAPS];
+	const char *expected_err;
+	uint32_t expected_val;
+	uint32_t retval;
+	__u32 pflags;
+	int i, err;
+
+	for (i = 0; i < MAX_NR_MAPS; i++)
+		map_fds[i] = -1;
+
+	if (!prog_type)
+		prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	do_test_fixup(test, prog_type, prog, map_fds);
+	prog_len = probe_filter_length(prog);
+
+	pflags = 0;
+	if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
+		pflags |= BPF_F_STRICT_ALIGNMENT;
+	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
+		pflags |= BPF_F_ANY_ALIGNMENT;
+	fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
+				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
+
+	expected_ret = unpriv && test->result_unpriv != UNDEF ?
+		       test->result_unpriv : test->result;
+	expected_err = unpriv && test->errstr_unpriv ?
+		       test->errstr_unpriv : test->errstr;
+	expected_val = unpriv && test->retval_unpriv ?
+		       test->retval_unpriv : test->retval;
+
+	alignment_prevented_execution = 0;
+
+	if (expected_ret == ACCEPT) {
+		if (fd_prog < 0) {
+			printf("FAIL\nFailed to load prog '%s'!\n",
+			       strerror(errno));
+			goto fail_log;
+		}
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+		if (fd_prog >= 0 &&
+		    (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) {
+			alignment_prevented_execution = 1;
+			goto test_ok;
+		}
+#endif
+	} else {
+		if (fd_prog >= 0) {
+			printf("FAIL\nUnexpected success to load!\n");
+			goto fail_log;
+		}
+		if (!strstr(bpf_vlog, expected_err)) {
+			printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+			      expected_err, bpf_vlog);
+			goto fail_log;
+		}
+	}
+
+	if (fd_prog >= 0) {
+		__u8 tmp[TEST_DATA_LEN << 2];
+		__u32 size_tmp = sizeof(tmp);
+
+		if (unpriv)
+			set_admin(true);
+		err = bpf_prog_test_run(fd_prog, 1, test->data,
+					sizeof(test->data), tmp, &size_tmp,
+					&retval, NULL);
+		if (unpriv)
+			set_admin(false);
+		if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
+			printf("Unexpected bpf_prog_test_run error\n");
+			goto fail_log;
+		}
+		if (!err && retval != expected_val &&
+		    expected_val != POINTER_VALUE) {
+			printf("FAIL retval %d != %d\n", retval, expected_val);
+			goto fail_log;
+		}
+	}
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+test_ok:
+#endif
+	(*passes)++;
+	printf("OK%s\n", alignment_prevented_execution ?
+	       " (NOTE: not executed due to unknown alignment)" : "");
+close_fds:
+	close(fd_prog);
+	for (i = 0; i < MAX_NR_MAPS; i++)
+		close(map_fds[i]);
+	sched_yield();
+	return;
+fail_log:
+	(*errors)++;
+	printf("%s", bpf_vlog);
+	goto close_fds;
+}
+
+static bool is_admin(void)
+{
+	cap_t caps;
+	cap_flag_value_t sysadmin = CAP_CLEAR;
+	const cap_value_t cap_val = CAP_SYS_ADMIN;
+
+#ifdef CAP_IS_SUPPORTED
+	if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
+		perror("cap_get_flag");
+		return false;
+	}
+#endif
+	caps = cap_get_proc();
+	if (!caps) {
+		perror("cap_get_proc");
+		return false;
+	}
+	if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin))
+		perror("cap_get_flag");
+	if (cap_free(caps))
+		perror("cap_free");
+	return (sysadmin == CAP_SET);
+}
+
+static void get_unpriv_disabled()
+{
+	char buf[2];
+	FILE *fd;
+
+	fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+	if (!fd) {
+		perror("fopen /proc/sys/"UNPRIV_SYSCTL);
+		unpriv_disabled = true;
+		return;
+	}
+	if (fgets(buf, 2, fd) == buf && atoi(buf))
+		unpriv_disabled = true;
+	fclose(fd);
+}
+
+static bool test_as_unpriv(struct bpf_test *test)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	/* Some architectures have strict alignment requirements. In
+	 * that case, the BPF verifier detects if a program has
+	 * unaligned accesses and rejects them. A user can pass
+	 * BPF_F_ANY_ALIGNMENT to a program to override this
+	 * check. That, however, will only work when a privileged user
+	 * loads a program. An unprivileged user loading a program
+	 * with this flag will be rejected prior entering the
+	 * verifier.
+	 */
+	if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
+		return false;
+#endif
+	return !test->prog_type ||
+	       test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER ||
+	       test->prog_type == BPF_PROG_TYPE_CGROUP_SKB;
+}
+
+static int do_test(bool unpriv, unsigned int from, unsigned int to)
+{
+	int i, passes = 0, errors = 0, skips = 0;
+
+	for (i = from; i < to; i++) {
+		struct bpf_test *test = &tests[i];
+
+		/* Program types that are not supported by non-root we
+		 * skip right away.
+		 */
+		if (test_as_unpriv(test) && unpriv_disabled) {
+			printf("#%d/u %s SKIP\n", i, test->descr);
+			skips++;
+		} else if (test_as_unpriv(test)) {
+			if (!unpriv)
+				set_admin(false);
+			printf("#%d/u %s ", i, test->descr);
+			do_test_single(test, true, &passes, &errors);
+			if (!unpriv)
+				set_admin(true);
+		}
+
+		if (unpriv) {
+			printf("#%d/p %s SKIP\n", i, test->descr);
+			skips++;
+		} else {
+			printf("#%d/p %s ", i, test->descr);
+			do_test_single(test, false, &passes, &errors);
+		}
+	}
+
+	printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+	       skips, errors);
+	return errors ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int from = 0, to = ARRAY_SIZE(tests);
+	bool unpriv = !is_admin();
+
+	if (argc == 3) {
+		unsigned int l = atoi(argv[argc - 2]);
+		unsigned int u = atoi(argv[argc - 1]);
+
+		if (l < to && u < to) {
+			from = l;
+			to   = u + 1;
+		}
+	} else if (argc == 2) {
+		unsigned int t = atoi(argv[argc - 1]);
+
+		if (t < to) {
+			from = t;
+			to   = t + 1;
+		}
+	}
+
+	get_unpriv_disabled();
+	if (unpriv && unpriv_disabled) {
+		printf("Cannot run as unprivileged user with sysctl %s.\n",
+		       UNPRIV_SYSCTL);
+		return EXIT_FAILURE;
+	}
+
+	bpf_semi_rand_init();
+	return do_test(unpriv, from, to);
+}
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
new file mode 100644
index 000000000..8d6918c3b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -0,0 +1,174 @@
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+
+#include <bpf/bpf.h>
+
+#include "bpf_rlimit.h"
+
+#define LOG_SIZE (1 << 20)
+
+#define err(str...)	printf("ERROR: " str)
+
+static const struct bpf_insn code_sample[] = {
+	/* We need a few instructions to pass the min log length */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+		     BPF_FUNC_map_lookup_elem),
+	BPF_EXIT_INSN(),
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+	return (__u64) (unsigned long) ptr;
+}
+
+static int load(char *log, size_t log_len, int log_level)
+{
+	union bpf_attr attr;
+
+	bzero(&attr, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insn_cnt = (__u32)(sizeof(code_sample) / sizeof(struct bpf_insn));
+	attr.insns = ptr_to_u64(code_sample);
+	attr.license = ptr_to_u64("GPL");
+	attr.log_buf = ptr_to_u64(log);
+	attr.log_size = log_len;
+	attr.log_level = log_level;
+
+	return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static void check_ret(int ret, int exp_errno)
+{
+	if (ret > 0) {
+		close(ret);
+		err("broken sample loaded successfully!?\n");
+		exit(1);
+	}
+
+	if (!ret || errno != exp_errno) {
+		err("Program load returned: ret:%d/errno:%d, expected ret:%d/errno:%d\n",
+		    ret, errno, -1, exp_errno);
+		exit(1);
+	}
+}
+
+static void check_ones(const char *buf, size_t len, const char *msg)
+{
+	while (len--)
+		if (buf[len] != 1) {
+			err("%s", msg);
+			exit(1);
+		}
+}
+
+static void test_log_good(char *log, size_t buf_len, size_t log_len,
+			  size_t exp_len, int exp_errno, const char *full_log)
+{
+	size_t len;
+	int ret;
+
+	memset(log, 1, buf_len);
+
+	ret = load(log, log_len, 1);
+	check_ret(ret, exp_errno);
+
+	len = strnlen(log, buf_len);
+	if (len == buf_len) {
+		err("verifier did not NULL terminate the log\n");
+		exit(1);
+	}
+	if (exp_len && len != exp_len) {
+		err("incorrect log length expected:%zd have:%zd\n",
+		    exp_len, len);
+		exit(1);
+	}
+
+	if (strchr(log, 1)) {
+		err("verifier leaked a byte through\n");
+		exit(1);
+	}
+
+	check_ones(log + len + 1, buf_len - len - 1,
+		   "verifier wrote bytes past NULL termination\n");
+
+	if (memcmp(full_log, log, LOG_SIZE)) {
+		err("log did not match expected output\n");
+		exit(1);
+	}
+}
+
+static void test_log_bad(char *log, size_t log_len, int log_level)
+{
+	int ret;
+
+	ret = load(log, log_len, log_level);
+	check_ret(ret, EINVAL);
+	if (log)
+		check_ones(log, LOG_SIZE,
+			   "verifier touched log with bad parameters\n");
+}
+
+int main(int argc, char **argv)
+{
+	char full_log[LOG_SIZE];
+	char log[LOG_SIZE];
+	size_t want_len;
+	int i;
+
+	memset(log, 1, LOG_SIZE);
+
+	/* Test incorrect attr */
+	printf("Test log_level 0...\n");
+	test_log_bad(log, LOG_SIZE, 0);
+
+	printf("Test log_size < 128...\n");
+	test_log_bad(log, 15, 1);
+
+	printf("Test log_buff = NULL...\n");
+	test_log_bad(NULL, LOG_SIZE, 1);
+
+	/* Test with log big enough */
+	printf("Test oversized buffer...\n");
+	test_log_good(full_log, LOG_SIZE, LOG_SIZE, 0, EACCES, full_log);
+
+	want_len = strlen(full_log);
+
+	printf("Test exact buffer...\n");
+	test_log_good(log, LOG_SIZE, want_len + 2, want_len, EACCES, full_log);
+
+	printf("Test undersized buffers...\n");
+	for (i = 0; i < 64; i++) {
+		full_log[want_len - i + 1] = 1;
+		full_log[want_len - i] = 0;
+
+		test_log_good(log, LOG_SIZE, want_len + 1 - i, want_len - i,
+			      ENOSPC, full_log);
+	}
+
+	printf("test_verifier_log: OK\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_xdp.c b/tools/testing/selftests/bpf/test_xdp.c
new file mode 100644
index 000000000..5e7df8bb5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp.c
@@ -0,0 +1,235 @@
+/* Copyright (c) 2016,2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_iptunnel_common.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip),
+	.value_size = sizeof(struct iptnl_info),
+	.max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(__u32 protocol)
+{
+	__u64 *rxcnt_count;
+
+	rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+	if (rxcnt_count)
+		*rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+				     __u8 protocol)
+{
+	struct tcphdr *th;
+	struct udphdr *uh;
+
+	switch (protocol) {
+	case IPPROTO_TCP:
+		th = (struct tcphdr *)trans_data;
+		if (th + 1 > data_end)
+			return -1;
+		return th->dest;
+	case IPPROTO_UDP:
+		uh = (struct udphdr *)trans_data;
+		if (uh + 1 > data_end)
+			return -1;
+		return uh->dest;
+	default:
+		return 0;
+	}
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+				       const struct ethhdr *old_eth,
+				       const struct iptnl_info *tnl,
+				       __be16 h_proto)
+{
+	memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+	memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+	new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct iptnl_info *tnl;
+	struct ethhdr *new_eth;
+	struct ethhdr *old_eth;
+	struct iphdr *iph = data + sizeof(struct ethhdr);
+	__u16 *next_iph;
+	__u16 payload_len;
+	struct vip vip = {};
+	int dport;
+	__u32 csum = 0;
+	int i;
+
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+
+	dport = get_dport(iph + 1, data_end, iph->protocol);
+	if (dport == -1)
+		return XDP_DROP;
+
+	vip.protocol = iph->protocol;
+	vip.family = AF_INET;
+	vip.daddr.v4 = iph->daddr;
+	vip.dport = dport;
+	payload_len = bpf_ntohs(iph->tot_len);
+
+	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+	/* It only does v4-in-v4 */
+	if (!tnl || tnl->family != AF_INET)
+		return XDP_PASS;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+		return XDP_DROP;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+
+	new_eth = data;
+	iph = data + sizeof(*new_eth);
+	old_eth = data + sizeof(*iph);
+
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end ||
+	    iph + 1 > data_end)
+		return XDP_DROP;
+
+	set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+	iph->version = 4;
+	iph->ihl = sizeof(*iph) >> 2;
+	iph->frag_off =	0;
+	iph->protocol = IPPROTO_IPIP;
+	iph->check = 0;
+	iph->tos = 0;
+	iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
+	iph->daddr = tnl->daddr.v4;
+	iph->saddr = tnl->saddr.v4;
+	iph->ttl = 8;
+
+	next_iph = (__u16 *)iph;
+#pragma clang loop unroll(full)
+	for (i = 0; i < sizeof(*iph) >> 1; i++)
+		csum += *next_iph++;
+
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+	count_tx(vip.protocol);
+
+	return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct iptnl_info *tnl;
+	struct ethhdr *new_eth;
+	struct ethhdr *old_eth;
+	struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+	__u16 payload_len;
+	struct vip vip = {};
+	int dport;
+
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+
+	dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+	if (dport == -1)
+		return XDP_DROP;
+
+	vip.protocol = ip6h->nexthdr;
+	vip.family = AF_INET6;
+	memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+	vip.dport = dport;
+	payload_len = ip6h->payload_len;
+
+	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+	/* It only does v6-in-v6 */
+	if (!tnl || tnl->family != AF_INET6)
+		return XDP_PASS;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+		return XDP_DROP;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+
+	new_eth = data;
+	ip6h = data + sizeof(*new_eth);
+	old_eth = data + sizeof(*ip6h);
+
+	if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
+	    ip6h + 1 > data_end)
+		return XDP_DROP;
+
+	set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+	ip6h->version = 6;
+	ip6h->priority = 0;
+	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+	ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
+	ip6h->nexthdr = IPPROTO_IPV6;
+	ip6h->hop_limit = 8;
+	memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+	memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+	count_tx(vip.protocol);
+
+	return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eth = data;
+	__u16 h_proto;
+
+	if (eth + 1 > data_end)
+		return XDP_DROP;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == bpf_htons(ETH_P_IP))
+		return handle_ipv4(xdp);
+	else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+		return handle_ipv6(xdp);
+	else
+		return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.c b/tools/testing/selftests/bpf/test_xdp_meta.c
new file mode 100644
index 000000000..8d0182650
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_meta.c
@@ -0,0 +1,53 @@
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+
+#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
+#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
+#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
+
+SEC("t")
+int ing_cls(struct __sk_buff *ctx)
+{
+	__u8 *data, *data_meta, *data_end;
+	__u32 diff = 0;
+
+	data_meta = ctx_ptr(ctx, data_meta);
+	data_end  = ctx_ptr(ctx, data_end);
+	data      = ctx_ptr(ctx, data);
+
+	if (data + ETH_ALEN > data_end ||
+	    data_meta + round_up(ETH_ALEN, 4) > data)
+		return TC_ACT_SHOT;
+
+	diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
+	diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
+
+	return diff ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("x")
+int ing_xdp(struct xdp_md *ctx)
+{
+	__u8 *data, *data_meta, *data_end;
+	int ret;
+
+	ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
+	if (ret < 0)
+		return XDP_DROP;
+
+	data_meta = ctx_ptr(ctx, data_meta);
+	data_end  = ctx_ptr(ctx, data_end);
+	data      = ctx_ptr(ctx, data);
+
+	if (data + ETH_ALEN > data_end ||
+	    data_meta + round_up(ETH_ALEN, 4) > data)
+		return XDP_DROP;
+
+	__builtin_memcpy(data_meta, data, ETH_ALEN);
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
new file mode 100755
index 000000000..637fcf4fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+
+cleanup()
+{
+	if [ "$?" = "0" ]; then
+		echo "selftests: test_xdp_meta [PASS]";
+	else
+		echo "selftests: test_xdp_meta [FAILED]";
+	fi
+
+	set +e
+	ip link del veth1 2> /dev/null
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+}
+
+ip link set dev lo xdp off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+	echo "selftests: [SKIP] Could not run test without the ip xdp support"
+	exit 0
+fi
+set -e
+
+ip netns add ns1
+ip netns add ns2
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 type veth peer name veth2
+
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+
+ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1
+ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2
+
+ip netns exec ns1 tc qdisc add dev veth1 clsact
+ip netns exec ns2 tc qdisc add dev veth2 clsact
+
+ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
+
+ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x
+ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x
+
+ip netns exec ns1 ip link set dev veth1 up
+ip netns exec ns2 ip link set dev veth2 up
+
+ip netns exec ns1 ping -c 1 10.1.1.22
+ip netns exec ns2 ping -c 1 10.1.1.11
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c
new file mode 100644
index 000000000..5e4aac74f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_noinline.c
@@ -0,0 +1,833 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+
+#define bpf_printk(fmt, ...)				\
+({							\
+	char ____fmt[] = fmt;				\
+	bpf_trace_printk(____fmt, sizeof(____fmt),	\
+			##__VA_ARGS__);			\
+})
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+	return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+#define JHASH_INITVAL		0xdeadbeef
+
+typedef unsigned int u32;
+
+static __attribute__ ((noinline))
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const unsigned char *k = key;
+
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	while (length > 12) {
+		a += *(u32 *)(k);
+		b += *(u32 *)(k + 4);
+		c += *(u32 *)(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+
+static __attribute__ ((noinline))
+u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += initval;
+	b += initval;
+	c += initval;
+	__jhash_final(a, b, c);
+	return c;
+}
+
+static __attribute__ ((noinline))
+u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+struct flow_key {
+	union {
+		__be32 src;
+		__be32 srcv6[4];
+	};
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	union {
+		__u32 ports;
+		__u16 port16[2];
+	};
+	__u8 proto;
+};
+
+struct packet_description {
+	struct flow_key flow;
+	__u8 flags;
+};
+
+struct ctl_value {
+	union {
+		__u64 value;
+		__u32 ifindex;
+		__u8 mac[6];
+	};
+};
+
+struct vip_definition {
+	union {
+		__be32 vip;
+		__be32 vipv6[4];
+	};
+	__u16 port;
+	__u16 family;
+	__u8 proto;
+};
+
+struct vip_meta {
+	__u32 flags;
+	__u32 vip_num;
+};
+
+struct real_pos_lru {
+	__u32 pos;
+	__u64 atime;
+};
+
+struct real_definition {
+	union {
+		__be32 dst;
+		__be32 dstv6[4];
+	};
+	__u8 flags;
+};
+
+struct lb_stats {
+	__u64 v2;
+	__u64 v1;
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip_definition),
+	.value_size = sizeof(struct vip_meta),
+	.max_entries = 512,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
+	.type = BPF_MAP_TYPE_LRU_HASH,
+	.key_size = sizeof(struct flow_key),
+	.value_size = sizeof(struct real_pos_lru),
+	.max_entries = 300,
+	.map_flags = 1U << 1,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u32),
+	.max_entries = 12 * 655,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct real_definition),
+	.max_entries = 40,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct lb_stats),
+	.max_entries = 515,
+	.map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(struct ctl_value),
+	.max_entries = 16,
+	.map_flags = 0,
+};
+
+struct eth_hdr {
+	unsigned char eth_dest[6];
+	unsigned char eth_source[6];
+	unsigned short eth_proto;
+};
+
+static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+{
+	__u64 off = sizeof(struct eth_hdr);
+	if (is_ipv6) {
+		off += sizeof(struct ipv6hdr);
+		if (is_icmp)
+			off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
+	} else {
+		off += sizeof(struct iphdr);
+		if (is_icmp)
+			off += sizeof(struct icmphdr) + sizeof(struct iphdr);
+	}
+	return off;
+}
+
+static __attribute__ ((noinline))
+bool parse_udp(void *data, void *data_end,
+	       bool is_ipv6, struct packet_description *pckt)
+{
+
+	bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+	__u64 off = calc_offset(is_ipv6, is_icmp);
+	struct udphdr *udp;
+	udp = data + off;
+
+	if (udp + 1 > data_end)
+		return 0;
+	if (!is_icmp) {
+		pckt->flow.port16[0] = udp->source;
+		pckt->flow.port16[1] = udp->dest;
+	} else {
+		pckt->flow.port16[0] = udp->dest;
+		pckt->flow.port16[1] = udp->source;
+	}
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool parse_tcp(void *data, void *data_end,
+	       bool is_ipv6, struct packet_description *pckt)
+{
+
+	bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+	__u64 off = calc_offset(is_ipv6, is_icmp);
+	struct tcphdr *tcp;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return 0;
+	if (tcp->syn)
+		pckt->flags |= (1 << 1);
+	if (!is_icmp) {
+		pckt->flow.port16[0] = tcp->source;
+		pckt->flow.port16[1] = tcp->dest;
+	} else {
+		pckt->flow.port16[0] = tcp->dest;
+		pckt->flow.port16[1] = tcp->source;
+	}
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
+	      struct packet_description *pckt,
+	      struct real_definition *dst, __u32 pkt_bytes)
+{
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+	struct ipv6hdr *ip6h;
+	__u32 ip_suffix;
+	void *data_end;
+	void *data;
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+		return 0;
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+	new_eth = data;
+	ip6h = data + sizeof(struct eth_hdr);
+	old_eth = data + sizeof(struct ipv6hdr);
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end || ip6h + 1 > data_end)
+		return 0;
+	memcpy(new_eth->eth_dest, cval->mac, 6);
+	memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+	new_eth->eth_proto = 56710;
+	ip6h->version = 6;
+	ip6h->priority = 0;
+	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+
+	ip6h->nexthdr = IPPROTO_IPV6;
+	ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
+	ip6h->payload_len =
+	    __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
+	ip6h->hop_limit = 4;
+
+	ip6h->saddr.in6_u.u6_addr32[0] = 1;
+	ip6h->saddr.in6_u.u6_addr32[1] = 2;
+	ip6h->saddr.in6_u.u6_addr32[2] = 3;
+	ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
+	memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
+	      struct packet_description *pckt,
+	      struct real_definition *dst, __u32 pkt_bytes)
+{
+
+	__u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+	__u16 *next_iph_u16;
+	struct iphdr *iph;
+	__u32 csum = 0;
+	void *data_end;
+	void *data;
+
+	ip_suffix <<= 15;
+	ip_suffix ^= pckt->flow.src;
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+		return 0;
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+	new_eth = data;
+	iph = data + sizeof(struct eth_hdr);
+	old_eth = data + sizeof(struct iphdr);
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end || iph + 1 > data_end)
+		return 0;
+	memcpy(new_eth->eth_dest, cval->mac, 6);
+	memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+	new_eth->eth_proto = 8;
+	iph->version = 4;
+	iph->ihl = 5;
+	iph->frag_off = 0;
+	iph->protocol = IPPROTO_IPIP;
+	iph->check = 0;
+	iph->tos = 1;
+	iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
+	/* don't update iph->daddr, since it will overwrite old eth_proto
+	 * and multiple iterations of bpf_prog_run() will fail
+	 */
+
+	iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
+	iph->ttl = 4;
+
+	next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+	for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+		csum += *next_iph_u16++;
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+		return 0;
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
+{
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+
+	old_eth = *data;
+	new_eth = *data + sizeof(struct ipv6hdr);
+	memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+	memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+	if (inner_v4)
+		new_eth->eth_proto = 8;
+	else
+		new_eth->eth_proto = 56710;
+	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
+		return 0;
+	*data = (void *)(long)xdp->data;
+	*data_end = (void *)(long)xdp->data_end;
+	return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
+{
+	struct eth_hdr *new_eth;
+	struct eth_hdr *old_eth;
+
+	old_eth = *data;
+	new_eth = *data + sizeof(struct iphdr);
+	memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+	memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+	new_eth->eth_proto = 8;
+	if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+		return 0;
+	*data = (void *)(long)xdp->data;
+	*data_end = (void *)(long)xdp->data_end;
+	return 1;
+}
+
+static __attribute__ ((noinline))
+int swap_mac_and_send(void *data, void *data_end)
+{
+	unsigned char tmp_mac[6];
+	struct eth_hdr *eth;
+
+	eth = data;
+	memcpy(tmp_mac, eth->eth_source, 6);
+	memcpy(eth->eth_source, eth->eth_dest, 6);
+	memcpy(eth->eth_dest, tmp_mac, 6);
+	return XDP_TX;
+}
+
+static __attribute__ ((noinline))
+int send_icmp_reply(void *data, void *data_end)
+{
+	struct icmphdr *icmp_hdr;
+	__u16 *next_iph_u16;
+	__u32 tmp_addr = 0;
+	struct iphdr *iph;
+	__u32 csum1 = 0;
+	__u32 csum = 0;
+	__u64 off = 0;
+
+	if (data + sizeof(struct eth_hdr)
+	     + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
+		return XDP_DROP;
+	off += sizeof(struct eth_hdr);
+	iph = data + off;
+	off += sizeof(struct iphdr);
+	icmp_hdr = data + off;
+	icmp_hdr->type = 0;
+	icmp_hdr->checksum += 0x0007;
+	iph->ttl = 4;
+	tmp_addr = iph->daddr;
+	iph->daddr = iph->saddr;
+	iph->saddr = tmp_addr;
+	iph->check = 0;
+	next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+	for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+		csum += *next_iph_u16++;
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+	return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int send_icmp6_reply(void *data, void *data_end)
+{
+	struct icmp6hdr *icmp_hdr;
+	struct ipv6hdr *ip6h;
+	__be32 tmp_addr[4];
+	__u64 off = 0;
+
+	if (data + sizeof(struct eth_hdr)
+	     + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
+		return XDP_DROP;
+	off += sizeof(struct eth_hdr);
+	ip6h = data + off;
+	off += sizeof(struct ipv6hdr);
+	icmp_hdr = data + off;
+	icmp_hdr->icmp6_type = 129;
+	icmp_hdr->icmp6_cksum -= 0x0001;
+	ip6h->hop_limit = 4;
+	memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
+	memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
+	memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
+	return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int parse_icmpv6(void *data, void *data_end, __u64 off,
+		 struct packet_description *pckt)
+{
+	struct icmp6hdr *icmp_hdr;
+	struct ipv6hdr *ip6h;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return XDP_DROP;
+	if (icmp_hdr->icmp6_type == 128)
+		return send_icmp6_reply(data, data_end);
+	if (icmp_hdr->icmp6_type != 3)
+		return XDP_PASS;
+	off += sizeof(struct icmp6hdr);
+	ip6h = data + off;
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+	pckt->flow.proto = ip6h->nexthdr;
+	pckt->flags |= (1 << 0);
+	memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
+	memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
+	return -1;
+}
+
+static __attribute__ ((noinline))
+int parse_icmp(void *data, void *data_end, __u64 off,
+	       struct packet_description *pckt)
+{
+	struct icmphdr *icmp_hdr;
+	struct iphdr *iph;
+
+	icmp_hdr = data + off;
+	if (icmp_hdr + 1 > data_end)
+		return XDP_DROP;
+	if (icmp_hdr->type == 8)
+		return send_icmp_reply(data, data_end);
+	if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
+		return XDP_PASS;
+	off += sizeof(struct icmphdr);
+	iph = data + off;
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+	if (iph->ihl != 5)
+		return XDP_DROP;
+	pckt->flow.proto = iph->protocol;
+	pckt->flags |= (1 << 0);
+	pckt->flow.src = iph->daddr;
+	pckt->flow.dst = iph->saddr;
+	return -1;
+}
+
+static __attribute__ ((noinline))
+__u32 get_packet_hash(struct packet_description *pckt,
+		      bool hash_16bytes)
+{
+	if (hash_16bytes)
+		return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
+				    pckt->flow.ports, 24);
+	else
+		return jhash_2words(pckt->flow.src, pckt->flow.ports,
+				    24);
+}
+
+__attribute__ ((noinline))
+static bool get_packet_dst(struct real_definition **real,
+			   struct packet_description *pckt,
+			   struct vip_meta *vip_info,
+			   bool is_ipv6, void *lru_map)
+{
+	struct real_pos_lru new_dst_lru = { };
+	bool hash_16bytes = is_ipv6;
+	__u32 *real_pos, hash, key;
+	__u64 cur_time;
+
+	if (vip_info->flags & (1 << 2))
+		hash_16bytes = 1;
+	if (vip_info->flags & (1 << 3)) {
+		pckt->flow.port16[0] = pckt->flow.port16[1];
+		memset(pckt->flow.srcv6, 0, 16);
+	}
+	hash = get_packet_hash(pckt, hash_16bytes);
+	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
+	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+		return 0;
+	key = 2 * vip_info->vip_num + hash % 2;
+	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+	if (!real_pos)
+		return 0;
+	key = *real_pos;
+	*real = bpf_map_lookup_elem(&reals, &key);
+	if (!(*real))
+		return 0;
+	if (!(vip_info->flags & (1 << 1))) {
+		__u32 conn_rate_key = 512 + 2;
+		struct lb_stats *conn_rate_stats =
+		    bpf_map_lookup_elem(&stats, &conn_rate_key);
+
+		if (!conn_rate_stats)
+			return 1;
+		cur_time = bpf_ktime_get_ns();
+		if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
+			conn_rate_stats->v1 = 1;
+			conn_rate_stats->v2 = cur_time;
+		} else {
+			conn_rate_stats->v1 += 1;
+			if (conn_rate_stats->v1 >= 1)
+				return 1;
+		}
+		if (pckt->flow.proto == IPPROTO_UDP)
+			new_dst_lru.atime = cur_time;
+		new_dst_lru.pos = key;
+		bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
+	}
+	return 1;
+}
+
+__attribute__ ((noinline))
+static void connection_table_lookup(struct real_definition **real,
+				    struct packet_description *pckt,
+				    void *lru_map)
+{
+
+	struct real_pos_lru *dst_lru;
+	__u64 cur_time;
+	__u32 key;
+
+	dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
+	if (!dst_lru)
+		return;
+	if (pckt->flow.proto == IPPROTO_UDP) {
+		cur_time = bpf_ktime_get_ns();
+		if (cur_time - dst_lru->atime > 300000)
+			return;
+		dst_lru->atime = cur_time;
+	}
+	key = dst_lru->pos;
+	*real = bpf_map_lookup_elem(&reals, &key);
+}
+
+/* don't believe your eyes!
+ * below function has 6 arguments whereas bpf and llvm allow maximum of 5
+ * but since it's _static_ llvm can optimize one argument away
+ */
+__attribute__ ((noinline))
+static int process_l3_headers_v6(struct packet_description *pckt,
+				 __u8 *protocol, __u64 off,
+				 __u16 *pkt_bytes, void *data,
+				 void *data_end)
+{
+	struct ipv6hdr *ip6h;
+	__u64 iph_len;
+	int action;
+
+	ip6h = data + off;
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+	iph_len = sizeof(struct ipv6hdr);
+	*protocol = ip6h->nexthdr;
+	pckt->flow.proto = *protocol;
+	*pkt_bytes = __builtin_bswap16(ip6h->payload_len);
+	off += iph_len;
+	if (*protocol == 45) {
+		return XDP_DROP;
+	} else if (*protocol == 59) {
+		action = parse_icmpv6(data, data_end, off, pckt);
+		if (action >= 0)
+			return action;
+	} else {
+		memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
+		memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
+	}
+	return -1;
+}
+
+__attribute__ ((noinline))
+static int process_l3_headers_v4(struct packet_description *pckt,
+				 __u8 *protocol, __u64 off,
+				 __u16 *pkt_bytes, void *data,
+				 void *data_end)
+{
+	struct iphdr *iph;
+	__u64 iph_len;
+	int action;
+
+	iph = data + off;
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+	if (iph->ihl != 5)
+		return XDP_DROP;
+	*protocol = iph->protocol;
+	pckt->flow.proto = *protocol;
+	*pkt_bytes = __builtin_bswap16(iph->tot_len);
+	off += 20;
+	if (iph->frag_off & 65343)
+		return XDP_DROP;
+	if (*protocol == IPPROTO_ICMP) {
+		action = parse_icmp(data, data_end, off, pckt);
+		if (action >= 0)
+			return action;
+	} else {
+		pckt->flow.src = iph->saddr;
+		pckt->flow.dst = iph->daddr;
+	}
+	return -1;
+}
+
+__attribute__ ((noinline))
+static int process_packet(void *data, __u64 off, void *data_end,
+			  bool is_ipv6, struct xdp_md *xdp)
+{
+
+	struct real_definition *dst = NULL;
+	struct packet_description pckt = { };
+	struct vip_definition vip = { };
+	struct lb_stats *data_stats;
+	struct eth_hdr *eth = data;
+	void *lru_map = &lru_cache;
+	struct vip_meta *vip_info;
+	__u32 lru_stats_key = 513;
+	__u32 mac_addr_pos = 0;
+	__u32 stats_key = 512;
+	struct ctl_value *cval;
+	__u16 pkt_bytes;
+	__u64 iph_len;
+	__u8 protocol;
+	__u32 vip_num;
+	int action;
+
+	if (is_ipv6)
+		action = process_l3_headers_v6(&pckt, &protocol, off,
+					       &pkt_bytes, data, data_end);
+	else
+		action = process_l3_headers_v4(&pckt, &protocol, off,
+					       &pkt_bytes, data, data_end);
+	if (action >= 0)
+		return action;
+	protocol = pckt.flow.proto;
+	if (protocol == IPPROTO_TCP) {
+		if (!parse_tcp(data, data_end, is_ipv6, &pckt))
+			return XDP_DROP;
+	} else if (protocol == IPPROTO_UDP) {
+		if (!parse_udp(data, data_end, is_ipv6, &pckt))
+			return XDP_DROP;
+	} else {
+		return XDP_TX;
+	}
+
+	if (is_ipv6)
+		memcpy(vip.vipv6, pckt.flow.dstv6, 16);
+	else
+		vip.vip = pckt.flow.dst;
+	vip.port = pckt.flow.port16[1];
+	vip.proto = pckt.flow.proto;
+	vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+	if (!vip_info) {
+		vip.port = 0;
+		vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+		if (!vip_info)
+			return XDP_PASS;
+		if (!(vip_info->flags & (1 << 4)))
+			pckt.flow.port16[1] = 0;
+	}
+	if (data_end - data > 1400)
+		return XDP_DROP;
+	data_stats = bpf_map_lookup_elem(&stats, &stats_key);
+	if (!data_stats)
+		return XDP_DROP;
+	data_stats->v1 += 1;
+	if (!dst) {
+		if (vip_info->flags & (1 << 0))
+			pckt.flow.port16[0] = 0;
+		if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
+			connection_table_lookup(&dst, &pckt, lru_map);
+		if (dst)
+			goto out;
+		if (pckt.flow.proto == IPPROTO_TCP) {
+			struct lb_stats *lru_stats =
+			    bpf_map_lookup_elem(&stats, &lru_stats_key);
+
+			if (!lru_stats)
+				return XDP_DROP;
+			if (pckt.flags & (1 << 1))
+				lru_stats->v1 += 1;
+			else
+				lru_stats->v2 += 1;
+		}
+		if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
+			return XDP_DROP;
+		data_stats->v2 += 1;
+	}
+out:
+	cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
+	if (!cval)
+		return XDP_DROP;
+	if (dst->flags & (1 << 0)) {
+		if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
+			return XDP_DROP;
+	} else {
+		if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
+			return XDP_DROP;
+	}
+	vip_num = vip_info->vip_num;
+	data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+	if (!data_stats)
+		return XDP_DROP;
+	data_stats->v1 += 1;
+	data_stats->v2 += pkt_bytes;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+	if (data + 4 > data_end)
+		return XDP_DROP;
+	*(u32 *)data = dst->dst;
+	return XDP_DROP;
+}
+
+__attribute__ ((section("xdp-test"), used))
+int balancer_ingress(struct xdp_md *ctx)
+{
+	void *data = (void *)(long)ctx->data;
+	void *data_end = (void *)(long)ctx->data_end;
+	struct eth_hdr *eth = data;
+	__u32 eth_proto;
+	__u32 nh_off;
+
+	nh_off = sizeof(struct eth_hdr);
+	if (data + nh_off > data_end)
+		return XDP_DROP;
+	eth_proto = eth->eth_proto;
+	if (eth_proto == 8)
+		return process_packet(data, nh_off, data_end, 0, ctx);
+	else if (eth_proto == 56710)
+		return process_packet(data, nh_off, data_end, 1, ctx);
+	else
+		return XDP_DROP;
+}
+
+char _license[] __attribute__ ((section("license"), used)) = "GPL";
+int _version __attribute__ ((section("version"), used)) = 1;
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.c b/tools/testing/selftests/bpf/test_xdp_redirect.c
new file mode 100644
index 000000000..ef9e704be
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.c
@@ -0,0 +1,28 @@
+/* Copyright (c) 2017 VMware
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("redirect_to_111")
+int xdp_redirect_to_111(struct xdp_md *xdp)
+{
+	return bpf_redirect(111, 0);
+}
+SEC("redirect_to_222")
+int xdp_redirect_to_222(struct xdp_md *xdp)
+{
+	return bpf_redirect(222, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
new file mode 100755
index 000000000..c4b17e08d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+# Create 2 namespaces with two veth peers, and
+# forward packets in-between using generic XDP
+#
+# NS1(veth11)     NS2(veth22)
+#     |               |
+#     |               |
+#   (veth1, ------ (veth2,
+#   id:111)         id:222)
+#     | xdp forwarding |
+#     ------------------
+
+cleanup()
+{
+	if [ "$?" = "0" ]; then
+		echo "selftests: test_xdp_redirect [PASS]";
+	else
+		echo "selftests: test_xdp_redirect [FAILED]";
+	fi
+
+	set +e
+	ip link del veth1 2> /dev/null
+	ip link del veth2 2> /dev/null
+	ip netns del ns1 2> /dev/null
+	ip netns del ns2 2> /dev/null
+}
+
+ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+	echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+	exit 0
+fi
+set -e
+
+ip netns add ns1
+ip netns add ns2
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 index 111 type veth peer name veth11
+ip link add veth2 index 222 type veth peer name veth22
+
+ip link set veth11 netns ns1
+ip link set veth22 netns ns2
+
+ip link set veth1 up
+ip link set veth2 up
+
+ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
+ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
+
+ip netns exec ns1 ip link set dev veth11 up
+ip netns exec ns2 ip link set dev veth22 up
+
+ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
+ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
+
+ip netns exec ns1 ping -c 1 10.1.1.22
+ip netns exec ns2 ping -c 1 10.1.1.11
+
+exit 0
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
new file mode 100644
index 000000000..82922f13d
--- /dev/null
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <poll.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/mman.h>
+#include "trace_helpers.h"
+
+#define MAX_SYMS 300000
+static struct ksym syms[MAX_SYMS];
+static int sym_cnt;
+
+static int ksym_cmp(const void *p1, const void *p2)
+{
+	return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+int load_kallsyms(void)
+{
+	FILE *f = fopen("/proc/kallsyms", "r");
+	char func[256], buf[256];
+	char symbol;
+	void *addr;
+	int i = 0;
+
+	if (!f)
+		return -ENOENT;
+
+	while (!feof(f)) {
+		if (!fgets(buf, sizeof(buf), f))
+			break;
+		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
+			break;
+		if (!addr)
+			continue;
+		syms[i].addr = (long) addr;
+		syms[i].name = strdup(func);
+		i++;
+	}
+	fclose(f);
+	sym_cnt = i;
+	qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
+	return 0;
+}
+
+struct ksym *ksym_search(long key)
+{
+	int start = 0, end = sym_cnt;
+	int result;
+
+	/* kallsyms not loaded. return NULL */
+	if (sym_cnt <= 0)
+		return NULL;
+
+	while (start < end) {
+		size_t mid = start + (end - start) / 2;
+
+		result = key - syms[mid].addr;
+		if (result < 0)
+			end = mid;
+		else if (result > 0)
+			start = mid + 1;
+		else
+			return &syms[mid];
+	}
+
+	if (start >= 1 && syms[start - 1].addr < key &&
+	    key < syms[start].addr)
+		/* valid ksym */
+		return &syms[start - 1];
+
+	/* out of range. return _stext */
+	return &syms[0];
+}
+
+long ksym_get_addr(const char *name)
+{
+	int i;
+
+	for (i = 0; i < sym_cnt; i++) {
+		if (strcmp(syms[i].name, name) == 0)
+			return syms[i].addr;
+	}
+
+	return 0;
+}
+
+static int page_size;
+static int page_cnt = 8;
+static struct perf_event_mmap_page *header;
+
+int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header)
+{
+	void *base;
+	int mmap_size;
+
+	page_size = getpagesize();
+	mmap_size = page_size * (page_cnt + 1);
+
+	base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (base == MAP_FAILED) {
+		printf("mmap err\n");
+		return -1;
+	}
+
+	*header = base;
+	return 0;
+}
+
+int perf_event_mmap(int fd)
+{
+	return perf_event_mmap_header(fd, &header);
+}
+
+static int perf_event_poll(int fd)
+{
+	struct pollfd pfd = { .fd = fd, .events = POLLIN };
+
+	return poll(&pfd, 1, 1000);
+}
+
+struct perf_event_sample {
+	struct perf_event_header header;
+	__u32 size;
+	char data[];
+};
+
+static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv)
+{
+	struct perf_event_sample *e = event;
+	perf_event_print_fn fn = priv;
+	int ret;
+
+	if (e->header.type == PERF_RECORD_SAMPLE) {
+		ret = fn(e->data, e->size);
+		if (ret != LIBBPF_PERF_EVENT_CONT)
+			return ret;
+	} else if (e->header.type == PERF_RECORD_LOST) {
+		struct {
+			struct perf_event_header header;
+			__u64 id;
+			__u64 lost;
+		} *lost = (void *) e;
+		printf("lost %lld events\n", lost->lost);
+	} else {
+		printf("unknown event type=%d size=%d\n",
+		       e->header.type, e->header.size);
+	}
+
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+int perf_event_poller(int fd, perf_event_print_fn output_fn)
+{
+	enum bpf_perf_event_ret ret;
+	void *buf = NULL;
+	size_t len = 0;
+
+	for (;;) {
+		perf_event_poll(fd);
+		ret = bpf_perf_event_read_simple(header, page_cnt * page_size,
+						 page_size, &buf, &len,
+						 bpf_perf_event_print,
+						 output_fn);
+		if (ret != LIBBPF_PERF_EVENT_CONT)
+			break;
+	}
+	free(buf);
+
+	return ret;
+}
+
+int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
+			    int num_fds, perf_event_print_fn output_fn)
+{
+	enum bpf_perf_event_ret ret;
+	struct pollfd *pfds;
+	void *buf = NULL;
+	size_t len = 0;
+	int i;
+
+	pfds = calloc(num_fds, sizeof(*pfds));
+	if (!pfds)
+		return LIBBPF_PERF_EVENT_ERROR;
+
+	for (i = 0; i < num_fds; i++) {
+		pfds[i].fd = fds[i];
+		pfds[i].events = POLLIN;
+	}
+
+	for (;;) {
+		poll(pfds, num_fds, 1000);
+		for (i = 0; i < num_fds; i++) {
+			if (!pfds[i].revents)
+				continue;
+
+			ret = bpf_perf_event_read_simple(headers[i],
+							 page_cnt * page_size,
+							 page_size, &buf, &len,
+							 bpf_perf_event_print,
+							 output_fn);
+			if (ret != LIBBPF_PERF_EVENT_CONT)
+				break;
+		}
+	}
+	free(buf);
+	free(pfds);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
new file mode 100644
index 000000000..18924f23d
--- /dev/null
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRACE_HELPER_H
+#define __TRACE_HELPER_H
+
+#include <libbpf.h>
+#include <linux/perf_event.h>
+
+struct ksym {
+	long addr;
+	char *name;
+};
+
+int load_kallsyms(void);
+struct ksym *ksym_search(long key);
+long ksym_get_addr(const char *name);
+
+typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
+
+int perf_event_mmap(int fd);
+int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header);
+/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */
+int perf_event_poller(int fd, perf_event_print_fn output_fn);
+int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
+			    int num_fds, perf_event_print_fn output_fn);
+#endif
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
new file mode 100644
index 000000000..9de8b7cb4
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#define BUF_SIZE 256
+
+int main(int argc, char *argv[])
+{
+	int fd = open("/dev/urandom", O_RDONLY);
+	int i;
+	char buf[BUF_SIZE];
+	int count = 4;
+
+	if (fd < 0)
+		return 1;
+
+	if (argc == 2)
+		count = atoi(argv[1]);
+
+	for (i = 0; i < count; ++i)
+		read(fd, buf, BUF_SIZE);
+
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/breakpoints/.gitignore b/tools/testing/selftests/breakpoints/.gitignore
new file mode 100644
index 000000000..a23bb4a6f
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/.gitignore
@@ -0,0 +1,2 @@
+breakpoint_test
+step_after_suspend_test
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
new file mode 100644
index 000000000..9ec2c78de
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+# Taken from perf makefile
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+TEST_GEN_PROGS := step_after_suspend_test
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS += breakpoint_test
+endif
+ifneq (,$(filter $(ARCH),aarch64 arm64))
+TEST_GEN_PROGS += breakpoint_test_arm64
+endif
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
new file mode 100644
index 000000000..901b85ea6
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for breakpoints (and more generally the do_debug() path) in x86.
+ */
+
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+
+/* Breakpoint access modes */
+enum {
+	BP_X = 1,
+	BP_RW = 2,
+	BP_W = 4,
+};
+
+static pid_t child_pid;
+
+/*
+ * Ensures the child and parent are always "talking" about
+ * the same test sequence. (ie: that we haven't forgotten
+ * to call check_trapped() somewhere).
+ */
+static int nr_tests;
+
+static void set_breakpoint_addr(void *addr, int n)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_POKEUSER, child_pid,
+		     offsetof(struct user, u_debugreg[n]), addr);
+	if (ret)
+		ksft_exit_fail_msg("Can't set breakpoint addr: %s\n",
+			strerror(errno));
+}
+
+static void toggle_breakpoint(int n, int type, int len,
+			      int local, int global, int set)
+{
+	int ret;
+
+	int xtype, xlen;
+	unsigned long vdr7, dr7;
+
+	switch (type) {
+	case BP_X:
+		xtype = 0;
+		break;
+	case BP_W:
+		xtype = 1;
+		break;
+	case BP_RW:
+		xtype = 3;
+		break;
+	}
+
+	switch (len) {
+	case 1:
+		xlen = 0;
+		break;
+	case 2:
+		xlen = 4;
+		break;
+	case 4:
+		xlen = 0xc;
+		break;
+	case 8:
+		xlen = 8;
+		break;
+	}
+
+	dr7 = ptrace(PTRACE_PEEKUSER, child_pid,
+		     offsetof(struct user, u_debugreg[7]), 0);
+
+	vdr7 = (xlen | xtype) << 16;
+	vdr7 <<= 4 * n;
+
+	if (local) {
+		vdr7 |= 1 << (2 * n);
+		vdr7 |= 1 << 8;
+	}
+	if (global) {
+		vdr7 |= 2 << (2 * n);
+		vdr7 |= 1 << 9;
+	}
+
+	if (set)
+		dr7 |= vdr7;
+	else
+		dr7 &= ~vdr7;
+
+	ret = ptrace(PTRACE_POKEUSER, child_pid,
+		     offsetof(struct user, u_debugreg[7]), dr7);
+	if (ret) {
+		ksft_print_msg("Can't set dr7: %s\n", strerror(errno));
+		exit(-1);
+	}
+}
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long dummy_var[4];
+
+/* Dummy functions to test execution accesses */
+static void dummy_func(void) { }
+static void dummy_func1(void) { }
+static void dummy_func2(void) { }
+static void dummy_func3(void) { }
+
+static void (*dummy_funcs[])(void) = {
+	dummy_func,
+	dummy_func1,
+	dummy_func2,
+	dummy_func3,
+};
+
+static int trapped;
+
+static void check_trapped(void)
+{
+	/*
+	 * If we haven't trapped, wake up the parent
+	 * so that it notices the failure.
+	 */
+	if (!trapped)
+		kill(getpid(), SIGUSR1);
+	trapped = 0;
+
+	nr_tests++;
+}
+
+static void write_var(int len)
+{
+	char *pcval; short *psval; int *pival; long long *plval;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		switch (len) {
+		case 1:
+			pcval = (char *)&dummy_var[i];
+			*pcval = 0xff;
+			break;
+		case 2:
+			psval = (short *)&dummy_var[i];
+			*psval = 0xffff;
+			break;
+		case 4:
+			pival = (int *)&dummy_var[i];
+			*pival = 0xffffffff;
+			break;
+		case 8:
+			plval = (long long *)&dummy_var[i];
+			*plval = 0xffffffffffffffffLL;
+			break;
+		}
+		check_trapped();
+	}
+}
+
+static void read_var(int len)
+{
+	char cval; short sval; int ival; long long lval;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		switch (len) {
+		case 1:
+			cval = *(char *)&dummy_var[i];
+			break;
+		case 2:
+			sval = *(short *)&dummy_var[i];
+			break;
+		case 4:
+			ival = *(int *)&dummy_var[i];
+			break;
+		case 8:
+			lval = *(long long *)&dummy_var[i];
+			break;
+		}
+		check_trapped();
+	}
+}
+
+/*
+ * Do the r/w/x accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+	int len, local, global, i;
+	char val;
+	int ret;
+
+	ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+	if (ret) {
+		ksft_print_msg("Can't be traced? %s\n", strerror(errno));
+		return;
+	}
+
+	/* Wake up father so that it sets up the first test */
+	kill(getpid(), SIGUSR1);
+
+	/* Test instruction breakpoints */
+	for (local = 0; local < 2; local++) {
+		for (global = 0; global < 2; global++) {
+			if (!local && !global)
+				continue;
+
+			for (i = 0; i < 4; i++) {
+				dummy_funcs[i]();
+				check_trapped();
+			}
+		}
+	}
+
+	/* Test write watchpoints */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				write_var(len);
+			}
+		}
+	}
+
+	/* Test read/write watchpoints (on read accesses) */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				read_var(len);
+			}
+		}
+	}
+
+	/* Icebp trap */
+	asm(".byte 0xf1\n");
+	check_trapped();
+
+	/* Int 3 trap */
+	asm("int $3\n");
+	check_trapped();
+
+	kill(getpid(), SIGUSR1);
+}
+
+static void check_success(const char *msg)
+{
+	int child_nr_tests;
+	int status;
+	int ret;
+
+	/* Wait for the child to SIGTRAP */
+	wait(&status);
+
+	ret = 0;
+
+	if (WSTOPSIG(status) == SIGTRAP) {
+		child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid,
+					&nr_tests, 0);
+		if (child_nr_tests == nr_tests)
+			ret = 1;
+		if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1))
+			ksft_exit_fail_msg("Can't poke: %s\n", strerror(errno));
+	}
+
+	nr_tests++;
+
+	if (ret)
+		ksft_test_result_pass(msg);
+	else
+		ksft_test_result_fail(msg);
+}
+
+static void launch_instruction_breakpoints(char *buf, int local, int global)
+{
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		set_breakpoint_addr(dummy_funcs[i], i);
+		toggle_breakpoint(i, BP_X, 1, local, global, 1);
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		sprintf(buf, "Test breakpoint %d with local: %d global: %d\n",
+			i, local, global);
+		check_success(buf);
+		toggle_breakpoint(i, BP_X, 1, local, global, 0);
+	}
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+			       int local, int global)
+{
+	const char *mode_str;
+	int i;
+
+	if (mode == BP_W)
+		mode_str = "write";
+	else
+		mode_str = "read";
+
+	for (i = 0; i < 4; i++) {
+		set_breakpoint_addr(&dummy_var[i], i);
+		toggle_breakpoint(i, mode, len, local, global, 1);
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		sprintf(buf,
+			"Test %s watchpoint %d with len: %d local: %d global: %d\n",
+			mode_str, i, len, local, global);
+		check_success(buf);
+		toggle_breakpoint(i, mode, len, local, global, 0);
+	}
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static void launch_tests(void)
+{
+	char buf[1024];
+	int len, local, global, i;
+
+	/* Instruction breakpoints */
+	for (local = 0; local < 2; local++) {
+		for (global = 0; global < 2; global++) {
+			if (!local && !global)
+				continue;
+			launch_instruction_breakpoints(buf, local, global);
+		}
+	}
+
+	/* Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				launch_watchpoints(buf, BP_W, len,
+						   local, global);
+			}
+		}
+	}
+
+	/* Read-Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1) {
+		for (local = 0; local < 2; local++) {
+			for (global = 0; global < 2; global++) {
+				if (!local && !global)
+					continue;
+				launch_watchpoints(buf, BP_RW, len,
+						   local, global);
+			}
+		}
+	}
+
+	/* Icebp traps */
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success("Test icebp\n");
+
+	/* Int 3 traps */
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success("Test int 3 trap\n");
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+	pid_t pid;
+	int ret;
+
+	ksft_print_header();
+
+	pid = fork();
+	if (!pid) {
+		trigger_tests();
+		exit(0);
+	}
+
+	child_pid = pid;
+
+	wait(NULL);
+
+	launch_tests();
+
+	wait(NULL);
+
+	ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
new file mode 100644
index 000000000..2d95e5add
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Original Code by Pavel Labath <labath@google.com>
+ *
+ * Code modified by Pratyush Anand <panand@redhat.com>
+ * for testing different byte select for each access size.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <asm/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ptrace.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <elf.h>
+#include <errno.h>
+#include <signal.h>
+
+#include "../kselftest.h"
+
+static volatile uint8_t var[96] __attribute__((__aligned__(32)));
+
+static void child(int size, int wr)
+{
+	volatile uint8_t *addr = &var[32 + wr];
+
+	if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) {
+		ksft_print_msg(
+			"ptrace(PTRACE_TRACEME) failed: %s\n",
+			strerror(errno));
+		_exit(1);
+	}
+
+	if (raise(SIGSTOP) != 0) {
+		ksft_print_msg(
+			"raise(SIGSTOP) failed: %s\n", strerror(errno));
+		_exit(1);
+	}
+
+	if ((uintptr_t) addr % size) {
+		ksft_print_msg(
+			 "Wrong address write for the given size: %s\n",
+			 strerror(errno));
+		_exit(1);
+	}
+
+	switch (size) {
+	case 1:
+		*addr = 47;
+		break;
+	case 2:
+		*(uint16_t *)addr = 47;
+		break;
+	case 4:
+		*(uint32_t *)addr = 47;
+		break;
+	case 8:
+		*(uint64_t *)addr = 47;
+		break;
+	case 16:
+		__asm__ volatile ("stp x29, x30, %0" : "=m" (addr[0]));
+		break;
+	case 32:
+		__asm__ volatile ("stp q29, q30, %0" : "=m" (addr[0]));
+		break;
+	}
+
+	_exit(0);
+}
+
+static bool set_watchpoint(pid_t pid, int size, int wp)
+{
+	const volatile uint8_t *addr = &var[32 + wp];
+	const int offset = (uintptr_t)addr % 8;
+	const unsigned int byte_mask = ((1 << size) - 1) << offset;
+	const unsigned int type = 2; /* Write */
+	const unsigned int enable = 1;
+	const unsigned int control = byte_mask << 5 | type << 3 | enable;
+	struct user_hwdebug_state dreg_state;
+	struct iovec iov;
+
+	memset(&dreg_state, 0, sizeof(dreg_state));
+	dreg_state.dbg_regs[0].addr = (uintptr_t)(addr - offset);
+	dreg_state.dbg_regs[0].ctrl = control;
+	iov.iov_base = &dreg_state;
+	iov.iov_len = offsetof(struct user_hwdebug_state, dbg_regs) +
+				sizeof(dreg_state.dbg_regs[0]);
+	if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_HW_WATCH, &iov) == 0)
+		return true;
+
+	if (errno == EIO)
+		ksft_print_msg(
+			"ptrace(PTRACE_SETREGSET, NT_ARM_HW_WATCH) not supported on this hardware: %s\n",
+			strerror(errno));
+
+	ksft_print_msg(
+		"ptrace(PTRACE_SETREGSET, NT_ARM_HW_WATCH) failed: %s\n",
+		strerror(errno));
+	return false;
+}
+
+static bool run_test(int wr_size, int wp_size, int wr, int wp)
+{
+	int status;
+	siginfo_t siginfo;
+	pid_t pid = fork();
+	pid_t wpid;
+
+	if (pid < 0) {
+		ksft_test_result_fail(
+			"fork() failed: %s\n", strerror(errno));
+		return false;
+	}
+	if (pid == 0)
+		child(wr_size, wr);
+
+	wpid = waitpid(pid, &status, __WALL);
+	if (wpid != pid) {
+		ksft_print_msg(
+			"waitpid() failed: %s\n", strerror(errno));
+		return false;
+	}
+	if (!WIFSTOPPED(status)) {
+		ksft_print_msg(
+			"child did not stop: %s\n", strerror(errno));
+		return false;
+	}
+	if (WSTOPSIG(status) != SIGSTOP) {
+		ksft_print_msg("child did not stop with SIGSTOP\n");
+		return false;
+	}
+
+	if (!set_watchpoint(pid, wp_size, wp))
+		return false;
+
+	if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
+		ksft_print_msg(
+			"ptrace(PTRACE_SINGLESTEP) failed: %s\n",
+			strerror(errno));
+		return false;
+	}
+
+	alarm(3);
+	wpid = waitpid(pid, &status, __WALL);
+	if (wpid != pid) {
+		ksft_print_msg(
+			"waitpid() failed: %s\n", strerror(errno));
+		return false;
+	}
+	alarm(0);
+	if (WIFEXITED(status)) {
+		ksft_print_msg("child did not single-step\n");
+		return false;
+	}
+	if (!WIFSTOPPED(status)) {
+		ksft_print_msg("child did not stop\n");
+		return false;
+	}
+	if (WSTOPSIG(status) != SIGTRAP) {
+		ksft_print_msg("child did not stop with SIGTRAP\n");
+		return false;
+	}
+	if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo) != 0) {
+		ksft_print_msg(
+			"ptrace(PTRACE_GETSIGINFO): %s\n",
+			strerror(errno));
+		return false;
+	}
+	if (siginfo.si_code != TRAP_HWBKPT) {
+		ksft_print_msg(
+			"Unexpected si_code %d\n", siginfo.si_code);
+		return false;
+	}
+
+	kill(pid, SIGKILL);
+	wpid = waitpid(pid, &status, 0);
+	if (wpid != pid) {
+		ksft_print_msg(
+			"waitpid() failed: %s\n", strerror(errno));
+		return false;
+	}
+	return true;
+}
+
+static void sigalrm(int sig)
+{
+}
+
+int main(int argc, char **argv)
+{
+	int opt;
+	bool succeeded = true;
+	struct sigaction act;
+	int wr, wp, size;
+	bool result;
+
+	ksft_print_header();
+
+	act.sa_handler = sigalrm;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = 0;
+	sigaction(SIGALRM, &act, NULL);
+	for (size = 1; size <= 32; size = size*2) {
+		for (wr = 0; wr <= 32; wr = wr + size) {
+			for (wp = wr - size; wp <= wr + size; wp = wp + size) {
+				result = run_test(size, MIN(size, 8), wr, wp);
+				if ((result && wr == wp) ||
+				    (!result && wr != wp))
+					ksft_test_result_pass(
+						"Test size = %d write offset = %d watchpoint offset = %d\n",
+						size, wr, wp);
+				else {
+					ksft_test_result_fail(
+						"Test size = %d write offset = %d watchpoint offset = %d\n",
+						size, wr, wp);
+					succeeded = false;
+				}
+			}
+		}
+	}
+
+	for (size = 1; size <= 32; size = size*2) {
+		if (run_test(size, 8, -size, -8))
+			ksft_test_result_pass(
+				"Test size = %d write offset = %d watchpoint offset = -8\n",
+				size, -size);
+		else {
+			ksft_test_result_fail(
+				"Test size = %d write offset = %d watchpoint offset = -8\n",
+				size, -size);
+			succeeded = false;
+		}
+	}
+
+	if (succeeded)
+		ksft_exit_pass();
+	else
+		ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
new file mode 100644
index 000000000..f82dcc1f8
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <sys/stat.h>
+#include <sys/timerfd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+
+void child(int cpu)
+{
+	cpu_set_t set;
+
+	CPU_ZERO(&set);
+	CPU_SET(cpu, &set);
+	if (sched_setaffinity(0, sizeof(set), &set) != 0) {
+		ksft_print_msg("sched_setaffinity() failed: %s\n",
+			strerror(errno));
+		_exit(1);
+	}
+
+	if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) {
+		ksft_print_msg("ptrace(PTRACE_TRACEME) failed: %s\n",
+			strerror(errno));
+		_exit(1);
+	}
+
+	if (raise(SIGSTOP) != 0) {
+		ksft_print_msg("raise(SIGSTOP) failed: %s\n", strerror(errno));
+		_exit(1);
+	}
+
+	_exit(0);
+}
+
+bool run_test(int cpu)
+{
+	int status;
+	pid_t pid = fork();
+	pid_t wpid;
+
+	if (pid < 0) {
+		ksft_print_msg("fork() failed: %s\n", strerror(errno));
+		return false;
+	}
+	if (pid == 0)
+		child(cpu);
+
+	wpid = waitpid(pid, &status, __WALL);
+	if (wpid != pid) {
+		ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
+		return false;
+	}
+	if (!WIFSTOPPED(status)) {
+		ksft_print_msg("child did not stop: %s\n", strerror(errno));
+		return false;
+	}
+	if (WSTOPSIG(status) != SIGSTOP) {
+		ksft_print_msg("child did not stop with SIGSTOP: %s\n",
+			strerror(errno));
+		return false;
+	}
+
+	if (ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL) < 0) {
+		if (errno == EIO) {
+			ksft_exit_skip(
+				"ptrace(PTRACE_SINGLESTEP) not supported on this architecture: %s\n",
+				strerror(errno));
+		}
+		ksft_print_msg("ptrace(PTRACE_SINGLESTEP) failed: %s\n",
+			strerror(errno));
+		return false;
+	}
+
+	wpid = waitpid(pid, &status, __WALL);
+	if (wpid != pid) {
+		ksft_print_msg("waitpid() failed: $s\n", strerror(errno));
+		return false;
+	}
+	if (WIFEXITED(status)) {
+		ksft_print_msg("child did not single-step: %s\n",
+			strerror(errno));
+		return false;
+	}
+	if (!WIFSTOPPED(status)) {
+		ksft_print_msg("child did not stop: %s\n", strerror(errno));
+		return false;
+	}
+	if (WSTOPSIG(status) != SIGTRAP) {
+		ksft_print_msg("child did not stop with SIGTRAP: %s\n",
+			strerror(errno));
+		return false;
+	}
+
+	if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
+		ksft_print_msg("ptrace(PTRACE_CONT) failed: %s\n",
+			strerror(errno));
+		return false;
+	}
+
+	wpid = waitpid(pid, &status, __WALL);
+	if (wpid != pid) {
+		ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
+		return false;
+	}
+	if (!WIFEXITED(status)) {
+		ksft_print_msg("child did not exit after PTRACE_CONT: %s\n",
+			strerror(errno));
+		return false;
+	}
+
+	return true;
+}
+
+void suspend(void)
+{
+	int power_state_fd;
+	struct sigevent event = {};
+	int timerfd;
+	int err;
+	struct itimerspec spec = {};
+
+	if (getuid() != 0)
+		ksft_exit_skip("Please run the test as root - Exiting.\n");
+
+	power_state_fd = open("/sys/power/state", O_RDWR);
+	if (power_state_fd < 0)
+		ksft_exit_fail_msg(
+			"open(\"/sys/power/state\") failed %s)\n",
+			strerror(errno));
+
+	timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
+	if (timerfd < 0)
+		ksft_exit_fail_msg("timerfd_create() failed\n");
+
+	spec.it_value.tv_sec = 5;
+	err = timerfd_settime(timerfd, 0, &spec, NULL);
+	if (err < 0)
+		ksft_exit_fail_msg("timerfd_settime() failed\n");
+
+	if (write(power_state_fd, "mem", strlen("mem")) != strlen("mem"))
+		ksft_exit_fail_msg("Failed to enter Suspend state\n");
+
+	close(timerfd);
+	close(power_state_fd);
+}
+
+int main(int argc, char **argv)
+{
+	int opt;
+	bool do_suspend = true;
+	bool succeeded = true;
+	cpu_set_t available_cpus;
+	int err;
+	int cpu;
+
+	ksft_print_header();
+
+	while ((opt = getopt(argc, argv, "n")) != -1) {
+		switch (opt) {
+		case 'n':
+			do_suspend = false;
+			break;
+		default:
+			printf("Usage: %s [-n]\n", argv[0]);
+			printf("        -n: do not trigger a suspend/resume cycle before the test\n");
+			return -1;
+		}
+	}
+
+	if (do_suspend)
+		suspend();
+
+	err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus);
+	if (err < 0)
+		ksft_exit_fail_msg("sched_getaffinity() failed\n");
+
+	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+		bool test_success;
+
+		if (!CPU_ISSET(cpu, &available_cpus))
+			continue;
+
+		test_success = run_test(cpu);
+		if (test_success) {
+			ksft_test_result_pass("CPU %d\n", cpu);
+		} else {
+			ksft_test_result_fail("CPU %d\n", cpu);
+			succeeded = false;
+		}
+	}
+
+	if (succeeded)
+		ksft_exit_pass();
+	else
+		ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/capabilities/.gitignore b/tools/testing/selftests/capabilities/.gitignore
new file mode 100644
index 000000000..b732dd0d4
--- /dev/null
+++ b/tools/testing/selftests/capabilities/.gitignore
@@ -0,0 +1,2 @@
+test_execve
+validate_cap
diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile
new file mode 100644
index 000000000..6e9d98d45
--- /dev/null
+++ b/tools/testing/selftests/capabilities/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_FILES := validate_cap
+TEST_GEN_PROGS := test_execve
+
+CFLAGS += -O2 -g -std=gnu99 -Wall
+LDLIBS += -lcap-ng -lrt -ldl
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
new file mode 100644
index 000000000..3ab39a61b
--- /dev/null
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <cap-ng.h>
+#include <linux/capability.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <limits.h>
+#include <libgen.h>
+#include <malloc.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#include "../kselftest.h"
+
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT			47
+# define PR_CAP_AMBIENT_IS_SET		1
+# define PR_CAP_AMBIENT_RAISE		2
+# define PR_CAP_AMBIENT_LOWER		3
+# define PR_CAP_AMBIENT_CLEAR_ALL	4
+#endif
+
+static int nerrs;
+static pid_t mpid;	/*  main() pid is used to avoid duplicate test counts */
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
+{
+	char buf[4096];
+	int fd;
+	ssize_t written;
+	int buf_len;
+
+	buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+	if (buf_len < 0)
+		ksft_exit_fail_msg("vsnprintf failed - %s\n", strerror(errno));
+
+	if (buf_len >= sizeof(buf))
+		ksft_exit_fail_msg("vsnprintf output truncated\n");
+
+
+	fd = open(filename, O_WRONLY);
+	if (fd < 0) {
+		if ((errno == ENOENT) && enoent_ok)
+			return;
+		ksft_exit_fail_msg("open of %s failed - %s\n",
+					filename, strerror(errno));
+	}
+	written = write(fd, buf, buf_len);
+	if (written != buf_len) {
+		if (written >= 0) {
+			ksft_exit_fail_msg("short write to %s\n", filename);
+		} else {
+			ksft_exit_fail_msg("write to %s failed - %s\n",
+						filename, strerror(errno));
+		}
+	}
+	if (close(fd) != 0) {
+		ksft_exit_fail_msg("close of %s failed - %s\n",
+					filename, strerror(errno));
+	}
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmaybe_write_file(true, filename, fmt, ap);
+	va_end(ap);
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmaybe_write_file(false, filename, fmt, ap);
+	va_end(ap);
+}
+
+static bool create_and_enter_ns(uid_t inner_uid)
+{
+	uid_t outer_uid;
+	gid_t outer_gid;
+	int i;
+	bool have_outer_privilege;
+
+	outer_uid = getuid();
+	outer_gid = getgid();
+
+	/*
+	 * TODO: If we're already root, we could skip creating the userns.
+	 */
+
+	if (unshare(CLONE_NEWNS) == 0) {
+		ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n");
+		if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0)
+			ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n",
+						strerror(errno));
+		if (setresuid(inner_uid, inner_uid, -1) != 0)
+			ksft_exit_fail_msg("setresuid - %s\n", strerror(errno));
+
+		// Re-enable effective caps
+		capng_get_caps_process();
+		for (i = 0; i < CAP_LAST_CAP; i++)
+			if (capng_have_capability(CAPNG_PERMITTED, i))
+				capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i);
+		if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+			ksft_exit_fail_msg(
+					"capng_apply - %s\n", strerror(errno));
+
+		have_outer_privilege = true;
+	} else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) {
+		ksft_print_msg("[NOTE]\tUsing a user namespace for tests\n");
+		maybe_write_file("/proc/self/setgroups", "deny");
+		write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid);
+		write_file("/proc/self/gid_map", "0 %d 1", outer_gid);
+
+		have_outer_privilege = false;
+	} else {
+		ksft_exit_skip("must be root or be able to create a userns\n");
+	}
+
+	if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0)
+		ksft_exit_fail_msg("remount everything private - %s\n",
+					strerror(errno));
+
+	return have_outer_privilege;
+}
+
+static void chdir_to_tmpfs(void)
+{
+	char cwd[PATH_MAX];
+	if (getcwd(cwd, sizeof(cwd)) != cwd)
+		ksft_exit_fail_msg("getcwd - %s\n", strerror(errno));
+
+	if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0)
+		ksft_exit_fail_msg("mount private tmpfs - %s\n",
+					strerror(errno));
+
+	if (chdir(cwd) != 0)
+		ksft_exit_fail_msg("chdir to private tmpfs - %s\n",
+					strerror(errno));
+}
+
+static void copy_fromat_to(int fromfd, const char *fromname, const char *toname)
+{
+	int from = openat(fromfd, fromname, O_RDONLY);
+	if (from == -1)
+		ksft_exit_fail_msg("open copy source - %s\n", strerror(errno));
+
+	int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700);
+
+	while (true) {
+		char buf[4096];
+		ssize_t sz = read(from, buf, sizeof(buf));
+		if (sz == 0)
+			break;
+		if (sz < 0)
+			ksft_exit_fail_msg("read - %s\n", strerror(errno));
+
+		if (write(to, buf, sz) != sz)
+			/* no short writes on tmpfs */
+			ksft_exit_fail_msg("write - %s\n", strerror(errno));
+	}
+
+	close(from);
+	close(to);
+}
+
+static bool fork_wait(void)
+{
+	pid_t child = fork();
+	if (child == 0) {
+		nerrs = 0;
+		return true;
+	} else if (child > 0) {
+		int status;
+		if (waitpid(child, &status, 0) != child ||
+		    !WIFEXITED(status)) {
+			ksft_print_msg("Child died\n");
+			nerrs++;
+		} else if (WEXITSTATUS(status) != 0) {
+			ksft_print_msg("Child failed\n");
+			nerrs++;
+		} else {
+			/* don't print this message for mpid */
+			if (getpid() != mpid)
+				ksft_test_result_pass("Passed\n");
+		}
+		return false;
+	} else {
+		ksft_exit_fail_msg("fork - %s\n", strerror(errno));
+		return false;
+	}
+}
+
+static void exec_other_validate_cap(const char *name,
+				    bool eff, bool perm, bool inh, bool ambient)
+{
+	execl(name, name, (eff ? "1" : "0"),
+	      (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"),
+	      NULL);
+	ksft_exit_fail_msg("execl - %s\n", strerror(errno));
+}
+
+static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient)
+{
+	exec_other_validate_cap("./validate_cap", eff, perm, inh, ambient);
+}
+
+static int do_tests(int uid, const char *our_path)
+{
+	bool have_outer_privilege = create_and_enter_ns(uid);
+
+	int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY);
+	if (ourpath_fd == -1)
+		ksft_exit_fail_msg("open '%s' - %s\n",
+					our_path, strerror(errno));
+
+	chdir_to_tmpfs();
+
+	copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap");
+
+	if (have_outer_privilege) {
+		uid_t gid = getegid();
+
+		copy_fromat_to(ourpath_fd, "validate_cap",
+			       "validate_cap_suidroot");
+		if (chown("validate_cap_suidroot", 0, -1) != 0)
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+		if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0)
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+
+		copy_fromat_to(ourpath_fd, "validate_cap",
+			       "validate_cap_suidnonroot");
+		if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0)
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+		if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0)
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+
+		copy_fromat_to(ourpath_fd, "validate_cap",
+			       "validate_cap_sgidroot");
+		if (chown("validate_cap_sgidroot", -1, 0) != 0)
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+		if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0)
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+
+		copy_fromat_to(ourpath_fd, "validate_cap",
+			       "validate_cap_sgidnonroot");
+		if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0)
+			ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+		if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0)
+			ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+	}
+
+	capng_get_caps_process();
+
+	/* Make sure that i starts out clear */
+	capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+
+	if (uid == 0) {
+		ksft_print_msg("[RUN]\tRoot => ep\n");
+		if (fork_wait())
+			exec_validate_cap(true, true, false, false);
+	} else {
+		ksft_print_msg("[RUN]\tNon-root => no caps\n");
+		if (fork_wait())
+			exec_validate_cap(false, false, false, false);
+	}
+
+	ksft_print_msg("Check cap_ambient manipulation rules\n");
+
+	/* We should not be able to add ambient caps yet. */
+	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) {
+		if (errno == EINVAL)
+			ksft_test_result_fail(
+				"PR_CAP_AMBIENT_RAISE isn't supported\n");
+		else
+			ksft_test_result_fail(
+				"PR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n");
+		return 1;
+	}
+	ksft_test_result_pass(
+		"PR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n");
+
+	capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW);
+	capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW);
+	capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW);
+	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) {
+		ksft_test_result_fail(
+			"PR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n");
+		return 1;
+	}
+	ksft_test_result_pass(
+		"PR_CAP_AMBIENT_RAISE failed on non-permitted cap\n");
+
+	capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+		ksft_test_result_fail(
+			"PR_CAP_AMBIENT_RAISE should have succeeded\n");
+		return 1;
+	}
+	ksft_test_result_pass("PR_CAP_AMBIENT_RAISE worked\n");
+
+	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) {
+		ksft_test_result_fail("PR_CAP_AMBIENT_IS_SET is broken\n");
+		return 1;
+	}
+
+	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0)
+		ksft_exit_fail_msg("PR_CAP_AMBIENT_CLEAR_ALL - %s\n",
+					strerror(errno));
+
+	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+		ksft_test_result_fail(
+			"PR_CAP_AMBIENT_CLEAR_ALL didn't work\n");
+		return 1;
+	}
+
+	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
+		ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n",
+					strerror(errno));
+
+	capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+
+	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+		ksft_test_result_fail("Dropping I should have dropped A\n");
+		return 1;
+	}
+
+	ksft_test_result_pass("Basic manipulation appears to work\n");
+
+	capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+		ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+	if (uid == 0) {
+		ksft_print_msg("[RUN]\tRoot +i => eip\n");
+		if (fork_wait())
+			exec_validate_cap(true, true, true, false);
+	} else {
+		ksft_print_msg("[RUN]\tNon-root +i => i\n");
+		if (fork_wait())
+			exec_validate_cap(false, false, true, false);
+	}
+
+	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
+		ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n",
+					strerror(errno));
+
+	ksft_print_msg("[RUN]\tUID %d +ia => eipa\n", uid);
+	if (fork_wait())
+		exec_validate_cap(true, true, true, true);
+
+	/* The remaining tests need real privilege */
+
+	if (!have_outer_privilege) {
+		ksft_test_result_skip("SUID/SGID tests (needs privilege)\n");
+		goto done;
+	}
+
+	if (uid == 0) {
+		ksft_print_msg("[RUN]\tRoot +ia, suidroot => eipa\n");
+		if (fork_wait())
+			exec_other_validate_cap("./validate_cap_suidroot",
+						true, true, true, true);
+
+		ksft_print_msg("[RUN]\tRoot +ia, suidnonroot => ip\n");
+		if (fork_wait())
+			exec_other_validate_cap("./validate_cap_suidnonroot",
+						false, true, true, false);
+
+		ksft_print_msg("[RUN]\tRoot +ia, sgidroot => eipa\n");
+		if (fork_wait())
+			exec_other_validate_cap("./validate_cap_sgidroot",
+						true, true, true, true);
+
+		if (fork_wait()) {
+			ksft_print_msg(
+				"[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n");
+			if (setresgid(1, 1, 1) != 0)
+				ksft_exit_fail_msg("setresgid - %s\n",
+							strerror(errno));
+			exec_other_validate_cap("./validate_cap_sgidroot",
+						true, true, true, false);
+		}
+
+		ksft_print_msg("[RUN]\tRoot +ia, sgidnonroot => eip\n");
+		if (fork_wait())
+			exec_other_validate_cap("./validate_cap_sgidnonroot",
+						true, true, true, false);
+	} else {
+		ksft_print_msg("[RUN]\tNon-root +ia, sgidnonroot => i\n");
+		if (fork_wait())
+			exec_other_validate_cap("./validate_cap_sgidnonroot",
+					false, false, true, false);
+
+		if (fork_wait()) {
+			ksft_print_msg("[RUN]\tNon-root +ia, sgidroot => i\n");
+			if (setresgid(1, 1, 1) != 0)
+				ksft_exit_fail_msg("setresgid - %s\n",
+							strerror(errno));
+			exec_other_validate_cap("./validate_cap_sgidroot",
+						false, false, true, false);
+		}
+	}
+
+done:
+	ksft_print_cnts();
+	return nerrs ? 1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+	char *tmp1, *tmp2, *our_path;
+
+	ksft_print_header();
+
+	/* Find our path */
+	tmp1 = strdup(argv[0]);
+	if (!tmp1)
+		ksft_exit_fail_msg("strdup - %s\n", strerror(errno));
+	tmp2 = dirname(tmp1);
+	our_path = strdup(tmp2);
+	if (!our_path)
+		ksft_exit_fail_msg("strdup - %s\n", strerror(errno));
+	free(tmp1);
+
+	mpid = getpid();
+
+	if (fork_wait()) {
+		ksft_print_msg("[RUN]\t+++ Tests with uid == 0 +++\n");
+		return do_tests(0, our_path);
+	}
+
+	ksft_print_msg("==================================================\n");
+
+	if (fork_wait()) {
+		ksft_print_msg("[RUN]\t+++ Tests with uid != 0 +++\n");
+		return do_tests(1, our_path);
+	}
+
+	return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
new file mode 100644
index 000000000..cdfc94268
--- /dev/null
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <cap-ng.h>
+#include <linux/capability.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/auxv.h>
+
+#include "../kselftest.h"
+
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT			47
+# define PR_CAP_AMBIENT_IS_SET		1
+# define PR_CAP_AMBIENT_RAISE		2
+# define PR_CAP_AMBIENT_LOWER		3
+# define PR_CAP_AMBIENT_CLEAR_ALL	4
+#endif
+
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
+# define HAVE_GETAUXVAL
+#endif
+
+static bool bool_arg(char **argv, int i)
+{
+	if (!strcmp(argv[i], "0"))
+		return false;
+	else if (!strcmp(argv[i], "1"))
+		return true;
+	else {
+		ksft_exit_fail_msg("wrong argv[%d]\n", i);
+		return false;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	const char *atsec = "";
+
+	/*
+	 * Be careful just in case a setgid or setcapped copy of this
+	 * helper gets out.
+	 */
+
+	if (argc != 5)
+		ksft_exit_fail_msg("wrong argc\n");
+
+#ifdef HAVE_GETAUXVAL
+	if (getauxval(AT_SECURE))
+		atsec = " (AT_SECURE is set)";
+	else
+		atsec = " (AT_SECURE is not set)";
+#endif
+
+	capng_get_caps_process();
+
+	if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) {
+		ksft_print_msg("Wrong effective state%s\n", atsec);
+		return 1;
+	}
+
+	if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) {
+		ksft_print_msg("Wrong permitted state%s\n", atsec);
+		return 1;
+	}
+
+	if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) {
+		ksft_print_msg("Wrong inheritable state%s\n", atsec);
+		return 1;
+	}
+
+	if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) {
+		ksft_print_msg("Wrong ambient state%s\n", atsec);
+		return 1;
+	}
+
+	ksft_print_msg("%s: Capabilities after execve were correct\n",
+			"validate_cap:");
+	return 0;
+}
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
new file mode 100644
index 000000000..adacda50a
--- /dev/null
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -0,0 +1,2 @@
+test_memcontrol
+test_core
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
new file mode 100644
index 000000000..23fbaa4a9
--- /dev/null
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wall
+
+all:
+
+TEST_GEN_PROGS = test_memcontrol
+TEST_GEN_PROGS += test_core
+
+include ../lib.mk
+
+$(OUTPUT)/test_memcontrol: cgroup_util.c
+$(OUTPUT)/test_core: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
new file mode 100644
index 000000000..a516d01f0
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "cgroup_util.h"
+
+static ssize_t read_text(const char *path, char *buf, size_t max_len)
+{
+	ssize_t len;
+	int fd;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	len = read(fd, buf, max_len - 1);
+	if (len < 0)
+		goto out;
+
+	buf[len] = 0;
+out:
+	close(fd);
+	return len;
+}
+
+static ssize_t write_text(const char *path, char *buf, ssize_t len)
+{
+	int fd;
+
+	fd = open(path, O_WRONLY | O_APPEND);
+	if (fd < 0)
+		return fd;
+
+	len = write(fd, buf, len);
+	if (len < 0) {
+		close(fd);
+		return len;
+	}
+
+	close(fd);
+
+	return len;
+}
+
+char *cg_name(const char *root, const char *name)
+{
+	size_t len = strlen(root) + strlen(name) + 2;
+	char *ret = malloc(len);
+
+	snprintf(ret, len, "%s/%s", root, name);
+
+	return ret;
+}
+
+char *cg_name_indexed(const char *root, const char *name, int index)
+{
+	size_t len = strlen(root) + strlen(name) + 10;
+	char *ret = malloc(len);
+
+	snprintf(ret, len, "%s/%s_%d", root, name, index);
+
+	return ret;
+}
+
+int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
+{
+	char path[PATH_MAX];
+
+	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+	if (read_text(path, buf, len) >= 0)
+		return 0;
+
+	return -1;
+}
+
+int cg_read_strcmp(const char *cgroup, const char *control,
+		   const char *expected)
+{
+	size_t size;
+	char *buf;
+	int ret;
+
+	/* Handle the case of comparing against empty string */
+	if (!expected)
+		return -1;
+	else
+		size = strlen(expected) + 1;
+
+	buf = malloc(size);
+	if (!buf)
+		return -1;
+
+	if (cg_read(cgroup, control, buf, size)) {
+		free(buf);
+		return -1;
+	}
+
+	ret = strcmp(expected, buf);
+	free(buf);
+	return ret;
+}
+
+int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
+{
+	char buf[PAGE_SIZE];
+
+	if (cg_read(cgroup, control, buf, sizeof(buf)))
+		return -1;
+
+	return strstr(buf, needle) ? 0 : -1;
+}
+
+long cg_read_long(const char *cgroup, const char *control)
+{
+	char buf[128];
+
+	if (cg_read(cgroup, control, buf, sizeof(buf)))
+		return -1;
+
+	return atol(buf);
+}
+
+long cg_read_key_long(const char *cgroup, const char *control, const char *key)
+{
+	char buf[PAGE_SIZE];
+	char *ptr;
+
+	if (cg_read(cgroup, control, buf, sizeof(buf)))
+		return -1;
+
+	ptr = strstr(buf, key);
+	if (!ptr)
+		return -1;
+
+	return atol(ptr + strlen(key));
+}
+
+int cg_write(const char *cgroup, const char *control, char *buf)
+{
+	char path[PATH_MAX];
+	ssize_t len = strlen(buf);
+
+	snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+	if (write_text(path, buf, len) == len)
+		return 0;
+
+	return -1;
+}
+
+int cg_find_unified_root(char *root, size_t len)
+{
+	char buf[10 * PAGE_SIZE];
+	char *fs, *mount, *type;
+	const char delim[] = "\n\t ";
+
+	if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
+		return -1;
+
+	/*
+	 * Example:
+	 * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
+	 */
+	for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
+		mount = strtok(NULL, delim);
+		type = strtok(NULL, delim);
+		strtok(NULL, delim);
+		strtok(NULL, delim);
+		strtok(NULL, delim);
+
+		if (strcmp(type, "cgroup2") == 0) {
+			strncpy(root, mount, len);
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+int cg_create(const char *cgroup)
+{
+	return mkdir(cgroup, 0755);
+}
+
+static int cg_killall(const char *cgroup)
+{
+	char buf[PAGE_SIZE];
+	char *ptr = buf;
+
+	if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
+		return -1;
+
+	while (ptr < buf + sizeof(buf)) {
+		int pid = strtol(ptr, &ptr, 10);
+
+		if (pid == 0)
+			break;
+		if (*ptr)
+			ptr++;
+		else
+			break;
+		if (kill(pid, SIGKILL))
+			return -1;
+	}
+
+	return 0;
+}
+
+int cg_destroy(const char *cgroup)
+{
+	int ret;
+
+retry:
+	ret = rmdir(cgroup);
+	if (ret && errno == EBUSY) {
+		ret = cg_killall(cgroup);
+		if (ret)
+			return ret;
+		usleep(100);
+		goto retry;
+	}
+
+	if (ret && errno == ENOENT)
+		ret = 0;
+
+	return ret;
+}
+
+int cg_enter_current(const char *cgroup)
+{
+	char pidbuf[64];
+
+	snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
+	return cg_write(cgroup, "cgroup.procs", pidbuf);
+}
+
+int cg_run(const char *cgroup,
+	   int (*fn)(const char *cgroup, void *arg),
+	   void *arg)
+{
+	int pid, retcode;
+
+	pid = fork();
+	if (pid < 0) {
+		return pid;
+	} else if (pid == 0) {
+		char buf[64];
+
+		snprintf(buf, sizeof(buf), "%d", getpid());
+		if (cg_write(cgroup, "cgroup.procs", buf))
+			exit(EXIT_FAILURE);
+		exit(fn(cgroup, arg));
+	} else {
+		waitpid(pid, &retcode, 0);
+		if (WIFEXITED(retcode))
+			return WEXITSTATUS(retcode);
+		else
+			return -1;
+	}
+}
+
+int cg_run_nowait(const char *cgroup,
+		  int (*fn)(const char *cgroup, void *arg),
+		  void *arg)
+{
+	int pid;
+
+	pid = fork();
+	if (pid == 0) {
+		char buf[64];
+
+		snprintf(buf, sizeof(buf), "%d", getpid());
+		if (cg_write(cgroup, "cgroup.procs", buf))
+			exit(EXIT_FAILURE);
+		exit(fn(cgroup, arg));
+	}
+
+	return pid;
+}
+
+int get_temp_fd(void)
+{
+	return open(".", O_TMPFILE | O_RDWR | O_EXCL);
+}
+
+int alloc_pagecache(int fd, size_t size)
+{
+	char buf[PAGE_SIZE];
+	struct stat st;
+	int i;
+
+	if (fstat(fd, &st))
+		goto cleanup;
+
+	size += st.st_size;
+
+	if (ftruncate(fd, size))
+		goto cleanup;
+
+	for (i = 0; i < size; i += sizeof(buf))
+		read(fd, buf, sizeof(buf));
+
+	return 0;
+
+cleanup:
+	return -1;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+	size_t size = (unsigned long)arg;
+	char *buf, *ptr;
+
+	buf = malloc(size);
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 0;
+
+	free(buf);
+	return 0;
+}
+
+int is_swap_enabled(void)
+{
+	char buf[PAGE_SIZE];
+	const char delim[] = "\n";
+	int cnt = 0;
+	char *line;
+
+	if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
+		return -1;
+
+	for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+		cnt++;
+
+	return cnt > 1;
+}
+
+int set_oom_adj_score(int pid, int score)
+{
+	char path[PATH_MAX];
+	int fd, len;
+
+	sprintf(path, "/proc/%d/oom_score_adj", pid);
+
+	fd = open(path, O_WRONLY | O_APPEND);
+	if (fd < 0)
+		return fd;
+
+	len = dprintf(fd, "%d", score);
+	if (len < 0) {
+		close(fd);
+		return len;
+	}
+
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
new file mode 100644
index 000000000..9ac8b7958
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+
+#define PAGE_SIZE 4096
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define MB(x) (x << 20)
+
+/*
+ * Checks if two given values differ by less than err% of their sum.
+ */
+static inline int values_close(long a, long b, int err)
+{
+	return abs(a - b) <= (a + b) / 100 * err;
+}
+
+extern int cg_find_unified_root(char *root, size_t len);
+extern char *cg_name(const char *root, const char *name);
+extern char *cg_name_indexed(const char *root, const char *name, int index);
+extern int cg_create(const char *cgroup);
+extern int cg_destroy(const char *cgroup);
+extern int cg_read(const char *cgroup, const char *control,
+		   char *buf, size_t len);
+extern int cg_read_strcmp(const char *cgroup, const char *control,
+			  const char *expected);
+extern int cg_read_strstr(const char *cgroup, const char *control,
+			  const char *needle);
+extern long cg_read_long(const char *cgroup, const char *control);
+long cg_read_key_long(const char *cgroup, const char *control, const char *key);
+extern int cg_write(const char *cgroup, const char *control, char *buf);
+extern int cg_run(const char *cgroup,
+		  int (*fn)(const char *cgroup, void *arg),
+		  void *arg);
+extern int cg_enter_current(const char *cgroup);
+extern int cg_run_nowait(const char *cgroup,
+			 int (*fn)(const char *cgroup, void *arg),
+			 void *arg);
+extern int get_temp_fd(void);
+extern int alloc_pagecache(int fd, size_t size);
+extern int alloc_anon(const char *cgroup, void *arg);
+extern int is_swap_enabled(void);
+extern int set_oom_adj_score(int pid, int score);
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
new file mode 100644
index 000000000..599234c5e
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -0,0 +1,567 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+#include <linux/limits.h>
+#include <linux/sched.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * A(0) - B(0) - C(1)
+ *        \ D(0)
+ *
+ * A, B and C's "populated" fields would be 1 while D's 0.
+ * test that after the one process in C is moved to root,
+ * A,B and C's "populated" fields would flip to "0" and file
+ * modified events will be generated on the
+ * "cgroup.events" files of both cgroups.
+ */
+static int test_cgcore_populated(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *cg_test_a = NULL, *cg_test_b = NULL;
+	char *cg_test_c = NULL, *cg_test_d = NULL;
+
+	cg_test_a = cg_name(root, "cg_test_a");
+	cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
+	cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c");
+	cg_test_d = cg_name(root, "cg_test_a/cg_test_b/cg_test_d");
+
+	if (!cg_test_a || !cg_test_b || !cg_test_c || !cg_test_d)
+		goto cleanup;
+
+	if (cg_create(cg_test_a))
+		goto cleanup;
+
+	if (cg_create(cg_test_b))
+		goto cleanup;
+
+	if (cg_create(cg_test_c))
+		goto cleanup;
+
+	if (cg_create(cg_test_d))
+		goto cleanup;
+
+	if (cg_enter_current(cg_test_c))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 1\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 1\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 1\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	if (cg_enter_current(root))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (cg_test_d)
+		cg_destroy(cg_test_d);
+	if (cg_test_c)
+		cg_destroy(cg_test_c);
+	if (cg_test_b)
+		cg_destroy(cg_test_b);
+	if (cg_test_a)
+		cg_destroy(cg_test_a);
+	free(cg_test_d);
+	free(cg_test_c);
+	free(cg_test_b);
+	free(cg_test_a);
+	return ret;
+}
+
+/*
+ * A (domain threaded) - B (threaded) - C (domain)
+ *
+ * test that C can't be used until it is turned into a
+ * threaded cgroup.  "cgroup.type" file will report "domain (invalid)" in
+ * these cases. Operations which fail due to invalid topology use
+ * EOPNOTSUPP as the errno.
+ */
+static int test_cgcore_invalid_domain(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *grandparent = NULL, *parent = NULL, *child = NULL;
+
+	grandparent = cg_name(root, "cg_test_grandparent");
+	parent = cg_name(root, "cg_test_grandparent/cg_test_parent");
+	child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child");
+	if (!parent || !child || !grandparent)
+		goto cleanup;
+
+	if (cg_create(grandparent))
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	if (cg_read_strcmp(child, "cgroup.type", "domain invalid\n"))
+		goto cleanup;
+
+	if (!cg_enter_current(child))
+		goto cleanup;
+
+	if (errno != EOPNOTSUPP)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_enter_current(root);
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	if (grandparent)
+		cg_destroy(grandparent);
+	free(child);
+	free(parent);
+	free(grandparent);
+	return ret;
+}
+
+/*
+ * Test that when a child becomes threaded
+ * the parent type becomes domain threaded.
+ */
+static int test_cgcore_parent_becomes_threaded(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent = NULL, *child = NULL;
+
+	parent = cg_name(root, "cg_test_parent");
+	child = cg_name(root, "cg_test_parent/cg_test_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(child, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	if (cg_read_strcmp(parent, "cgroup.type", "domain threaded\n"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+	return ret;
+
+}
+
+/*
+ * Test that there's no internal process constrain on threaded cgroups.
+ * You can add threads/processes on a parent with a controller enabled.
+ */
+static int test_cgcore_no_internal_process_constraint_on_threads(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent = NULL, *child = NULL;
+
+	if (cg_read_strstr(root, "cgroup.controllers", "cpu") ||
+	    cg_write(root, "cgroup.subtree_control", "+cpu")) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	parent = cg_name(root, "cg_test_parent");
+	child = cg_name(root, "cg_test_parent/cg_test_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	if (cg_write(child, "cgroup.type", "threaded"))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+		goto cleanup;
+
+	if (cg_enter_current(parent))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_enter_current(root);
+	cg_enter_current(root);
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+	return ret;
+}
+
+/*
+ * Test that you can't enable a controller on a child if it's not enabled
+ * on the parent.
+ */
+static int test_cgcore_top_down_constraint_enable(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent = NULL, *child = NULL;
+
+	parent = cg_name(root, "cg_test_parent");
+	child = cg_name(root, "cg_test_parent/cg_test_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (!cg_write(child, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+	return ret;
+}
+
+/*
+ * Test that you can't disable a controller on a parent
+ * if it's enabled in a child.
+ */
+static int test_cgcore_top_down_constraint_disable(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent = NULL, *child = NULL;
+
+	parent = cg_name(root, "cg_test_parent");
+	child = cg_name(root, "cg_test_parent/cg_test_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_write(child, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (!cg_write(parent, "cgroup.subtree_control", "-memory"))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+	return ret;
+}
+
+/*
+ * Test internal process constraint.
+ * You can't add a pid to a domain parent if a controller is enabled.
+ */
+static int test_cgcore_internal_process_constraint(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent = NULL, *child = NULL;
+
+	parent = cg_name(root, "cg_test_parent");
+	child = cg_name(root, "cg_test_parent/cg_test_child");
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (!cg_enter_current(parent))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+	return ret;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the
+ * credentials at the time of open instead of write.
+ */
+static int test_cgcore_lesser_euid_open(const char *root)
+{
+	const uid_t test_euid = 65534;	/* usually nobody, any !root is fine */
+	int ret = KSFT_FAIL;
+	char *cg_test_a = NULL, *cg_test_b = NULL;
+	char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+	int cg_test_b_procs_fd = -1;
+	uid_t saved_uid;
+
+	cg_test_a = cg_name(root, "cg_test_a");
+	cg_test_b = cg_name(root, "cg_test_b");
+
+	if (!cg_test_a || !cg_test_b)
+		goto cleanup;
+
+	cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+	cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+	if (!cg_test_a_procs || !cg_test_b_procs)
+		goto cleanup;
+
+	if (cg_create(cg_test_a) || cg_create(cg_test_b))
+		goto cleanup;
+
+	if (cg_enter_current(cg_test_a))
+		goto cleanup;
+
+	if (chown(cg_test_a_procs, test_euid, -1) ||
+	    chown(cg_test_b_procs, test_euid, -1))
+		goto cleanup;
+
+	saved_uid = geteuid();
+	if (seteuid(test_euid))
+		goto cleanup;
+
+	cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
+
+	if (seteuid(saved_uid))
+		goto cleanup;
+
+	if (cg_test_b_procs_fd < 0)
+		goto cleanup;
+
+	if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_enter_current(root);
+	if (cg_test_b_procs_fd >= 0)
+		close(cg_test_b_procs_fd);
+	if (cg_test_b)
+		cg_destroy(cg_test_b);
+	if (cg_test_a)
+		cg_destroy(cg_test_a);
+	free(cg_test_b_procs);
+	free(cg_test_a_procs);
+	free(cg_test_b);
+	free(cg_test_a);
+	return ret;
+}
+
+struct lesser_ns_open_thread_arg {
+	const char	*path;
+	int		fd;
+	int		err;
+};
+
+static int lesser_ns_open_thread_fn(void *arg)
+{
+	struct lesser_ns_open_thread_arg *targ = arg;
+
+	targ->fd = open(targ->path, O_RDWR);
+	targ->err = errno;
+	return 0;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the cgroup
+ * namespace at the time of open instead of write.
+ */
+static int test_cgcore_lesser_ns_open(const char *root)
+{
+	static char stack[65536];
+	const uid_t test_euid = 65534;	/* usually nobody, any !root is fine */
+	int ret = KSFT_FAIL;
+	char *cg_test_a = NULL, *cg_test_b = NULL;
+	char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+	int cg_test_b_procs_fd = -1;
+	struct lesser_ns_open_thread_arg targ = { .fd = -1 };
+	pid_t pid;
+	int status;
+
+	cg_test_a = cg_name(root, "cg_test_a");
+	cg_test_b = cg_name(root, "cg_test_b");
+
+	if (!cg_test_a || !cg_test_b)
+		goto cleanup;
+
+	cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+	cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+	if (!cg_test_a_procs || !cg_test_b_procs)
+		goto cleanup;
+
+	if (cg_create(cg_test_a) || cg_create(cg_test_b))
+		goto cleanup;
+
+	if (cg_enter_current(cg_test_b))
+		goto cleanup;
+
+	if (chown(cg_test_a_procs, test_euid, -1) ||
+	    chown(cg_test_b_procs, test_euid, -1))
+		goto cleanup;
+
+	targ.path = cg_test_b_procs;
+	pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
+		    CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
+		    &targ);
+	if (pid < 0)
+		goto cleanup;
+
+	if (waitpid(pid, &status, 0) < 0)
+		goto cleanup;
+
+	if (!WIFEXITED(status))
+		goto cleanup;
+
+	cg_test_b_procs_fd = targ.fd;
+	if (cg_test_b_procs_fd < 0)
+		goto cleanup;
+
+	if (cg_enter_current(cg_test_a))
+		goto cleanup;
+
+	if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_enter_current(root);
+	if (cg_test_b_procs_fd >= 0)
+		close(cg_test_b_procs_fd);
+	if (cg_test_b)
+		cg_destroy(cg_test_b);
+	if (cg_test_a)
+		cg_destroy(cg_test_a);
+	free(cg_test_b_procs);
+	free(cg_test_a_procs);
+	free(cg_test_b);
+	free(cg_test_a);
+	return ret;
+}
+
+#define T(x) { x, #x }
+struct corecg_test {
+	int (*fn)(const char *root);
+	const char *name;
+} tests[] = {
+	T(test_cgcore_internal_process_constraint),
+	T(test_cgcore_top_down_constraint_enable),
+	T(test_cgcore_top_down_constraint_disable),
+	T(test_cgcore_no_internal_process_constraint_on_threads),
+	T(test_cgcore_parent_becomes_threaded),
+	T(test_cgcore_invalid_domain),
+	T(test_cgcore_populated),
+	T(test_cgcore_lesser_euid_open),
+	T(test_cgcore_lesser_ns_open),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+	char root[PATH_MAX];
+	int i, ret = EXIT_SUCCESS;
+
+	if (cg_find_unified_root(root, sizeof(root)))
+		ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+		if (cg_write(root, "cgroup.subtree_control", "+memory"))
+			ksft_exit_skip("Failed to set memory controller\n");
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		switch (tests[i].fn(root)) {
+		case KSFT_PASS:
+			ksft_test_result_pass("%s\n", tests[i].name);
+			break;
+		case KSFT_SKIP:
+			ksft_test_result_skip("%s\n", tests[i].name);
+			break;
+		default:
+			ret = EXIT_FAILURE;
+			ksft_test_result_fail("%s\n", tests[i].name);
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
new file mode 100644
index 000000000..c19a97dd0
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -0,0 +1,1228 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <linux/oom.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * This test creates two nested cgroups with and without enabling
+ * the memory controller.
+ */
+static int test_memcg_subtree_control(const char *root)
+{
+	char *parent, *child, *parent2 = NULL, *child2 = NULL;
+	int ret = KSFT_FAIL;
+	char buf[PAGE_SIZE];
+
+	/* Create two nested cgroups with the memory controller enabled */
+	parent = cg_name(root, "memcg_test_0");
+	child = cg_name(root, "memcg_test_0/memcg_test_1");
+	if (!parent || !child)
+		goto cleanup_free;
+
+	if (cg_create(parent))
+		goto cleanup_free;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup_parent;
+
+	if (cg_create(child))
+		goto cleanup_parent;
+
+	if (cg_read_strstr(child, "cgroup.controllers", "memory"))
+		goto cleanup_child;
+
+	/* Create two nested cgroups without enabling memory controller */
+	parent2 = cg_name(root, "memcg_test_1");
+	child2 = cg_name(root, "memcg_test_1/memcg_test_1");
+	if (!parent2 || !child2)
+		goto cleanup_free2;
+
+	if (cg_create(parent2))
+		goto cleanup_free2;
+
+	if (cg_create(child2))
+		goto cleanup_parent2;
+
+	if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
+		goto cleanup_all;
+
+	if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
+		goto cleanup_all;
+
+	ret = KSFT_PASS;
+
+cleanup_all:
+	cg_destroy(child2);
+cleanup_parent2:
+	cg_destroy(parent2);
+cleanup_free2:
+	free(parent2);
+	free(child2);
+cleanup_child:
+	cg_destroy(child);
+cleanup_parent:
+	cg_destroy(parent);
+cleanup_free:
+	free(parent);
+	free(child);
+
+	return ret;
+}
+
+static int alloc_anon_50M_check(const char *cgroup, void *arg)
+{
+	size_t size = MB(50);
+	char *buf, *ptr;
+	long anon, current;
+	int ret = -1;
+
+	buf = malloc(size);
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 0;
+
+	current = cg_read_long(cgroup, "memory.current");
+	if (current < size)
+		goto cleanup;
+
+	if (!values_close(size, current, 3))
+		goto cleanup;
+
+	anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
+	if (anon < 0)
+		goto cleanup;
+
+	if (!values_close(anon, current, 3))
+		goto cleanup;
+
+	ret = 0;
+cleanup:
+	free(buf);
+	return ret;
+}
+
+static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
+{
+	size_t size = MB(50);
+	int ret = -1;
+	long current, file;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		return -1;
+
+	if (alloc_pagecache(fd, size))
+		goto cleanup;
+
+	current = cg_read_long(cgroup, "memory.current");
+	if (current < size)
+		goto cleanup;
+
+	file = cg_read_key_long(cgroup, "memory.stat", "file ");
+	if (file < 0)
+		goto cleanup;
+
+	if (!values_close(file, current, 10))
+		goto cleanup;
+
+	ret = 0;
+
+cleanup:
+	close(fd);
+	return ret;
+}
+
+/*
+ * This test create a memory cgroup, allocates
+ * some anonymous memory and some pagecache
+ * and check memory.current and some memory.stat values.
+ */
+static int test_memcg_current(const char *root)
+{
+	int ret = KSFT_FAIL;
+	long current;
+	char *memcg;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	current = cg_read_long(memcg, "memory.current");
+	if (current != 0)
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_anon_50M_check, NULL))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+static int alloc_pagecache_50M(const char *cgroup, void *arg)
+{
+	int fd = (long)arg;
+
+	return alloc_pagecache(fd, MB(50));
+}
+
+static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
+{
+	int fd = (long)arg;
+	int ppid = getppid();
+
+	if (alloc_pagecache(fd, MB(50)))
+		return -1;
+
+	while (getppid() == ppid)
+		sleep(1);
+
+	return 0;
+}
+
+static int alloc_anon_noexit(const char *cgroup, void *arg)
+{
+	int ppid = getppid();
+
+	if (alloc_anon(cgroup, arg))
+		return -1;
+
+	while (getppid() == ppid)
+		sleep(1);
+
+	return 0;
+}
+
+/*
+ * Wait until processes are killed asynchronously by the OOM killer
+ * If we exceed a timeout, fail.
+ */
+static int cg_test_proc_killed(const char *cgroup)
+{
+	int limit;
+
+	for (limit = 10; limit > 0; limit--) {
+		if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
+			return 0;
+
+		usleep(100000);
+	}
+	return -1;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A       memory.min = 50M,  memory.max = 200M
+ * A/B     memory.min = 50M,  memory.current = 50M
+ * A/B/C   memory.min = 75M,  memory.current = 50M
+ * A/B/D   memory.min = 25M,  memory.current = 50M
+ * A/B/E   memory.min = 500M, memory.current = 0
+ * A/B/F   memory.min = 0,    memory.current = 50M
+ *
+ * Usages are pagecache, but the test keeps a running
+ * process in every leaf cgroup.
+ * Then it creates A/G and creates a significant
+ * memory pressure in it.
+ *
+ * A/B    memory.current ~= 50M
+ * A/B/C  memory.current ~= 33M
+ * A/B/D  memory.current ~= 17M
+ * A/B/E  memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available, and checks
+ * checks that memory.min protects pagecache even
+ * in this case.
+ */
+static int test_memcg_min(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent[3] = {NULL};
+	char *children[4] = {NULL};
+	long c[4];
+	int i, attempts;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		goto cleanup;
+
+	parent[0] = cg_name(root, "memcg_test_0");
+	if (!parent[0])
+		goto cleanup;
+
+	parent[1] = cg_name(parent[0], "memcg_test_1");
+	if (!parent[1])
+		goto cleanup;
+
+	parent[2] = cg_name(parent[0], "memcg_test_2");
+	if (!parent[2])
+		goto cleanup;
+
+	if (cg_create(parent[0]))
+		goto cleanup;
+
+	if (cg_read_long(parent[0], "memory.min")) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.max", "200M"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_create(parent[1]))
+		goto cleanup;
+
+	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_create(parent[2]))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+		if (!children[i])
+			goto cleanup;
+
+		if (cg_create(children[i]))
+			goto cleanup;
+
+		if (i == 2)
+			continue;
+
+		cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
+			      (void *)(long)fd);
+	}
+
+	if (cg_write(parent[0], "memory.min", "50M"))
+		goto cleanup;
+	if (cg_write(parent[1], "memory.min", "50M"))
+		goto cleanup;
+	if (cg_write(children[0], "memory.min", "75M"))
+		goto cleanup;
+	if (cg_write(children[1], "memory.min", "25M"))
+		goto cleanup;
+	if (cg_write(children[2], "memory.min", "500M"))
+		goto cleanup;
+	if (cg_write(children[3], "memory.min", "0"))
+		goto cleanup;
+
+	attempts = 0;
+	while (!values_close(cg_read_long(parent[1], "memory.current"),
+			     MB(150), 3)) {
+		if (attempts++ > 5)
+			break;
+		sleep(1);
+	}
+
+	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+		goto cleanup;
+
+	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++)
+		c[i] = cg_read_long(children[i], "memory.current");
+
+	if (!values_close(c[0], MB(33), 10))
+		goto cleanup;
+
+	if (!values_close(c[1], MB(17), 10))
+		goto cleanup;
+
+	if (!values_close(c[2], 0, 1))
+		goto cleanup;
+
+	if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
+		goto cleanup;
+
+	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+		if (!children[i])
+			continue;
+
+		cg_destroy(children[i]);
+		free(children[i]);
+	}
+
+	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+		if (!parent[i])
+			continue;
+
+		cg_destroy(parent[i]);
+		free(parent[i]);
+	}
+	close(fd);
+	return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A       memory.low = 50M,  memory.max = 200M
+ * A/B     memory.low = 50M,  memory.current = 50M
+ * A/B/C   memory.low = 75M,  memory.current = 50M
+ * A/B/D   memory.low = 25M,  memory.current = 50M
+ * A/B/E   memory.low = 500M, memory.current = 0
+ * A/B/F   memory.low = 0,    memory.current = 50M
+ *
+ * Usages are pagecache.
+ * Then it creates A/G an creates a significant
+ * memory pressure in it.
+ *
+ * Then it checks actual memory usages and expects that:
+ * A/B    memory.current ~= 50M
+ * A/B/   memory.current ~= 33M
+ * A/B/D  memory.current ~= 17M
+ * A/B/E  memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available,
+ * and checks low and oom events in memory.events.
+ */
+static int test_memcg_low(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent[3] = {NULL};
+	char *children[4] = {NULL};
+	long low, oom;
+	long c[4];
+	int i;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		goto cleanup;
+
+	parent[0] = cg_name(root, "memcg_test_0");
+	if (!parent[0])
+		goto cleanup;
+
+	parent[1] = cg_name(parent[0], "memcg_test_1");
+	if (!parent[1])
+		goto cleanup;
+
+	parent[2] = cg_name(parent[0], "memcg_test_2");
+	if (!parent[2])
+		goto cleanup;
+
+	if (cg_create(parent[0]))
+		goto cleanup;
+
+	if (cg_read_long(parent[0], "memory.low"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.max", "200M"))
+		goto cleanup;
+
+	if (cg_write(parent[0], "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_create(parent[1]))
+		goto cleanup;
+
+	if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_create(parent[2]))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+		if (!children[i])
+			goto cleanup;
+
+		if (cg_create(children[i]))
+			goto cleanup;
+
+		if (i == 2)
+			continue;
+
+		if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
+			goto cleanup;
+	}
+
+	if (cg_write(parent[0], "memory.low", "50M"))
+		goto cleanup;
+	if (cg_write(parent[1], "memory.low", "50M"))
+		goto cleanup;
+	if (cg_write(children[0], "memory.low", "75M"))
+		goto cleanup;
+	if (cg_write(children[1], "memory.low", "25M"))
+		goto cleanup;
+	if (cg_write(children[2], "memory.low", "500M"))
+		goto cleanup;
+	if (cg_write(children[3], "memory.low", "0"))
+		goto cleanup;
+
+	if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+		goto cleanup;
+
+	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(children); i++)
+		c[i] = cg_read_long(children[i], "memory.current");
+
+	if (!values_close(c[0], MB(33), 10))
+		goto cleanup;
+
+	if (!values_close(c[1], MB(17), 10))
+		goto cleanup;
+
+	if (!values_close(c[2], 0, 1))
+		goto cleanup;
+
+	if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
+		fprintf(stderr,
+			"memory.low prevents from allocating anon memory\n");
+		goto cleanup;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(children); i++) {
+		oom = cg_read_key_long(children[i], "memory.events", "oom ");
+		low = cg_read_key_long(children[i], "memory.events", "low ");
+
+		if (oom)
+			goto cleanup;
+		if (i < 2 && low <= 0)
+			goto cleanup;
+		if (i >= 2 && low)
+			goto cleanup;
+	}
+
+	ret = KSFT_PASS;
+
+cleanup:
+	for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+		if (!children[i])
+			continue;
+
+		cg_destroy(children[i]);
+		free(children[i]);
+	}
+
+	for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+		if (!parent[i])
+			continue;
+
+		cg_destroy(parent[i]);
+		free(parent[i]);
+	}
+	close(fd);
+	return ret;
+}
+
+static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
+{
+	size_t size = MB(50);
+	int ret = -1;
+	long current;
+	int fd;
+
+	fd = get_temp_fd();
+	if (fd < 0)
+		return -1;
+
+	if (alloc_pagecache(fd, size))
+		goto cleanup;
+
+	current = cg_read_long(cgroup, "memory.current");
+	if (current <= MB(29) || current > MB(30))
+		goto cleanup;
+
+	ret = 0;
+
+cleanup:
+	close(fd);
+	return ret;
+
+}
+
+/*
+ * This test checks that memory.high limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_high(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	long high;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "memory.high", "max\n"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.high", "30M"))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+		goto cleanup;
+
+	high = cg_read_key_long(memcg, "memory.events", "high ");
+	if (high <= 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+/*
+ * This test checks that memory.max limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_max(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	long current, max;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "30M"))
+		goto cleanup;
+
+	/* Should be killed by OOM killer */
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+		goto cleanup;
+
+	current = cg_read_long(memcg, "memory.current");
+	if (current > MB(30) || !current)
+		goto cleanup;
+
+	max = cg_read_key_long(memcg, "memory.events", "max ");
+	if (max <= 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
+{
+	long mem_max = (long)arg;
+	size_t size = MB(50);
+	char *buf, *ptr;
+	long mem_current, swap_current;
+	int ret = -1;
+
+	buf = malloc(size);
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 0;
+
+	mem_current = cg_read_long(cgroup, "memory.current");
+	if (!mem_current || !values_close(mem_current, mem_max, 3))
+		goto cleanup;
+
+	swap_current = cg_read_long(cgroup, "memory.swap.current");
+	if (!swap_current ||
+	    !values_close(mem_current + swap_current, size, 3))
+		goto cleanup;
+
+	ret = 0;
+cleanup:
+	free(buf);
+	return ret;
+}
+
+/*
+ * This test checks that memory.swap.max limits the amount of
+ * anonymous memory which can be swapped out.
+ */
+static int test_memcg_swap_max(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	long max;
+
+	if (!is_swap_enabled())
+		return KSFT_SKIP;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_read_long(memcg, "memory.swap.current")) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "30M"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "30M"))
+		goto cleanup;
+
+	/* Should be killed by OOM killer */
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+		goto cleanup;
+
+	if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
+		goto cleanup;
+
+	max = cg_read_key_long(memcg, "memory.events", "max ");
+	if (max <= 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM. Then it checks for oom and oom_kill events in
+ * memory.events.
+ */
+static int test_memcg_oom_events(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "30M"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_read_strcmp(memcg, "cgroup.procs", ""))
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+struct tcp_server_args {
+	unsigned short port;
+	int ctl[2];
+};
+
+static int tcp_server(const char *cgroup, void *arg)
+{
+	struct tcp_server_args *srv_args = arg;
+	struct sockaddr_in6 saddr = { 0 };
+	socklen_t slen = sizeof(saddr);
+	int sk, client_sk, ctl_fd, yes = 1, ret = -1;
+
+	close(srv_args->ctl[0]);
+	ctl_fd = srv_args->ctl[1];
+
+	saddr.sin6_family = AF_INET6;
+	saddr.sin6_addr = in6addr_any;
+	saddr.sin6_port = htons(srv_args->port);
+
+	sk = socket(AF_INET6, SOCK_STREAM, 0);
+	if (sk < 0)
+		return ret;
+
+	if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
+		goto cleanup;
+
+	if (bind(sk, (struct sockaddr *)&saddr, slen)) {
+		write(ctl_fd, &errno, sizeof(errno));
+		goto cleanup;
+	}
+
+	if (listen(sk, 1))
+		goto cleanup;
+
+	ret = 0;
+	if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
+		ret = -1;
+		goto cleanup;
+	}
+
+	client_sk = accept(sk, NULL, NULL);
+	if (client_sk < 0)
+		goto cleanup;
+
+	ret = -1;
+	for (;;) {
+		uint8_t buf[0x100000];
+
+		if (write(client_sk, buf, sizeof(buf)) <= 0) {
+			if (errno == ECONNRESET)
+				ret = 0;
+			break;
+		}
+	}
+
+	close(client_sk);
+
+cleanup:
+	close(sk);
+	return ret;
+}
+
+static int tcp_client(const char *cgroup, unsigned short port)
+{
+	const char server[] = "localhost";
+	struct addrinfo *ai;
+	char servport[6];
+	int retries = 0x10; /* nice round number */
+	int sk, ret;
+
+	snprintf(servport, sizeof(servport), "%hd", port);
+	ret = getaddrinfo(server, servport, NULL, &ai);
+	if (ret)
+		return ret;
+
+	sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+	if (sk < 0)
+		goto free_ainfo;
+
+	ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
+	if (ret < 0)
+		goto close_sk;
+
+	ret = KSFT_FAIL;
+	while (retries--) {
+		uint8_t buf[0x100000];
+		long current, sock;
+
+		if (read(sk, buf, sizeof(buf)) <= 0)
+			goto close_sk;
+
+		current = cg_read_long(cgroup, "memory.current");
+		sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
+
+		if (current < 0 || sock < 0)
+			goto close_sk;
+
+		if (current < sock)
+			goto close_sk;
+
+		if (values_close(current, sock, 10)) {
+			ret = KSFT_PASS;
+			break;
+		}
+	}
+
+close_sk:
+	close(sk);
+free_ainfo:
+	freeaddrinfo(ai);
+	return ret;
+}
+
+/*
+ * This test checks socket memory accounting.
+ * The test forks a TCP server listens on a random port between 1000
+ * and 61000. Once it gets a client connection, it starts writing to
+ * its socket.
+ * The TCP client interleaves reads from the socket with check whether
+ * memory.current and memory.stat.sock are similar.
+ */
+static int test_memcg_sock(const char *root)
+{
+	int bind_retries = 5, ret = KSFT_FAIL, pid, err;
+	unsigned short port;
+	char *memcg;
+
+	memcg = cg_name(root, "memcg_test");
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	while (bind_retries--) {
+		struct tcp_server_args args;
+
+		if (pipe(args.ctl))
+			goto cleanup;
+
+		port = args.port = 1000 + rand() % 60000;
+
+		pid = cg_run_nowait(memcg, tcp_server, &args);
+		if (pid < 0)
+			goto cleanup;
+
+		close(args.ctl[1]);
+		if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
+			goto cleanup;
+		close(args.ctl[0]);
+
+		if (!err)
+			break;
+		if (err != EADDRINUSE)
+			goto cleanup;
+
+		waitpid(pid, NULL, 0);
+	}
+
+	if (err == EADDRINUSE) {
+		ret = KSFT_SKIP;
+		goto cleanup;
+	}
+
+	if (tcp_client(memcg, port) != KSFT_PASS)
+		goto cleanup;
+
+	waitpid(pid, &err, 0);
+	if (WEXITSTATUS(err))
+		goto cleanup;
+
+	if (cg_read_long(memcg, "memory.current") < 0)
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.stat", "sock "))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes in the leaf (but not the parent) were killed.
+ */
+static int test_memcg_oom_group_leaf_events(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent, *child;
+
+	parent = cg_name(root, "memcg_test_0");
+	child = cg_name(root, "memcg_test_0/memcg_test_1");
+
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+		goto cleanup;
+
+	if (cg_write(child, "memory.max", "50M"))
+		goto cleanup;
+
+	if (cg_write(child, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(child, "memory.oom.group", "1"))
+		goto cleanup;
+
+	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
+	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+	if (!cg_run(child, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_test_proc_killed(child))
+		goto cleanup;
+
+	if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
+		goto cleanup;
+
+	if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+
+	return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes in the parent and leaf were killed.
+ */
+static int test_memcg_oom_group_parent_events(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *parent, *child;
+
+	parent = cg_name(root, "memcg_test_0");
+	child = cg_name(root, "memcg_test_0/memcg_test_1");
+
+	if (!parent || !child)
+		goto cleanup;
+
+	if (cg_create(parent))
+		goto cleanup;
+
+	if (cg_create(child))
+		goto cleanup;
+
+	if (cg_write(parent, "memory.max", "80M"))
+		goto cleanup;
+
+	if (cg_write(parent, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(parent, "memory.oom.group", "1"))
+		goto cleanup;
+
+	cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
+	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+	cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+
+	if (!cg_run(child, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_test_proc_killed(child))
+		goto cleanup;
+	if (cg_test_proc_killed(parent))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (child)
+		cg_destroy(child);
+	if (parent)
+		cg_destroy(parent);
+	free(child);
+	free(parent);
+
+	return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes were killed except those set with OOM_SCORE_ADJ_MIN
+ */
+static int test_memcg_oom_group_score_events(const char *root)
+{
+	int ret = KSFT_FAIL;
+	char *memcg;
+	int safe_pid;
+
+	memcg = cg_name(root, "memcg_test_0");
+
+	if (!memcg)
+		goto cleanup;
+
+	if (cg_create(memcg))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.max", "50M"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.swap.max", "0"))
+		goto cleanup;
+
+	if (cg_write(memcg, "memory.oom.group", "1"))
+		goto cleanup;
+
+	safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
+	if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
+		goto cleanup;
+
+	cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
+	if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+		goto cleanup;
+
+	if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
+		goto cleanup;
+
+	if (kill(safe_pid, SIGKILL))
+		goto cleanup;
+
+	ret = KSFT_PASS;
+
+cleanup:
+	if (memcg)
+		cg_destroy(memcg);
+	free(memcg);
+
+	return ret;
+}
+
+
+#define T(x) { x, #x }
+struct memcg_test {
+	int (*fn)(const char *root);
+	const char *name;
+} tests[] = {
+	T(test_memcg_subtree_control),
+	T(test_memcg_current),
+	T(test_memcg_min),
+	T(test_memcg_low),
+	T(test_memcg_high),
+	T(test_memcg_max),
+	T(test_memcg_oom_events),
+	T(test_memcg_swap_max),
+	T(test_memcg_sock),
+	T(test_memcg_oom_group_leaf_events),
+	T(test_memcg_oom_group_parent_events),
+	T(test_memcg_oom_group_score_events),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+	char root[PATH_MAX];
+	int i, ret = EXIT_SUCCESS;
+
+	if (cg_find_unified_root(root, sizeof(root)))
+		ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+	/*
+	 * Check that memory controller is available:
+	 * memory is listed in cgroup.controllers
+	 */
+	if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+		ksft_exit_skip("memory controller isn't available\n");
+
+	if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+		if (cg_write(root, "cgroup.subtree_control", "+memory"))
+			ksft_exit_skip("Failed to set memory controller\n");
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		switch (tests[i].fn(root)) {
+		case KSFT_PASS:
+			ksft_test_result_pass("%s\n", tests[i].name);
+			break;
+		case KSFT_SKIP:
+			ksft_test_result_skip("%s\n", tests[i].name);
+			break;
+		default:
+			ret = EXIT_FAILURE;
+			ksft_test_result_fail("%s\n", tests[i].name);
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile
new file mode 100644
index 000000000..d8be047ee
--- /dev/null
+++ b/tools/testing/selftests/cpu-hotplug/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := cpu-on-off-test.sh
+
+include ../lib.mk
+
+run_full_test:
+	@/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
+
+clean:
diff --git a/tools/testing/selftests/cpu-hotplug/config b/tools/testing/selftests/cpu-hotplug/config
new file mode 100644
index 000000000..d4aca2ad5
--- /dev/null
+++ b/tools/testing/selftests/cpu-hotplug/config
@@ -0,0 +1 @@
+CONFIG_NOTIFIER_ERROR_INJECTION=y
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
new file mode 100755
index 000000000..0d26b5e3f
--- /dev/null
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -0,0 +1,293 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+SYSFS=
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+prerequisite()
+{
+	msg="skip all tests:"
+
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit $ksft_skip
+	fi
+
+	taskset -p 01 $$
+
+	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+	if [ ! -d "$SYSFS" ]; then
+		echo $msg sysfs is not mounted >&2
+		exit $ksft_skip
+	fi
+
+	if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
+		echo $msg cpu hotplug is not supported >&2
+		exit $ksft_skip
+	fi
+
+	echo "CPU online/offline summary:"
+	online_cpus=`cat $SYSFS/devices/system/cpu/online`
+	online_max=${online_cpus##*-}
+
+	if [[ "$online_cpus" = "$online_max" ]]; then
+		echo "$msg: since there is only one cpu: $online_cpus"
+		exit $ksft_skip
+	fi
+
+	present_cpus=`cat $SYSFS/devices/system/cpu/present`
+	present_max=${present_cpus##*-}
+	echo "present_cpus = $present_cpus present_max = $present_max"
+
+	echo -e "\t Cpus in online state: $online_cpus"
+
+	offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
+	if [[ "a$offline_cpus" = "a" ]]; then
+		offline_cpus=0
+	else
+		offline_max=${offline_cpus##*-}
+	fi
+	echo -e "\t Cpus in offline state: $offline_cpus"
+}
+
+#
+# list all hot-pluggable CPUs
+#
+hotpluggable_cpus()
+{
+	local state=${1:-.\*}
+
+	for cpu in $SYSFS/devices/system/cpu/cpu*; do
+		if [ -f $cpu/online ] && grep -q $state $cpu/online; then
+			echo ${cpu##/*/cpu}
+		fi
+	done
+}
+
+hotplaggable_offline_cpus()
+{
+	hotpluggable_cpus 0
+}
+
+hotpluggable_online_cpus()
+{
+	hotpluggable_cpus 1
+}
+
+cpu_is_online()
+{
+	grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+cpu_is_offline()
+{
+	grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+online_cpu()
+{
+	echo 1 > $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+offline_cpu()
+{
+	echo 0 > $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+online_cpu_expect_success()
+{
+	local cpu=$1
+
+	if ! online_cpu $cpu; then
+		echo $FUNCNAME $cpu: unexpected fail >&2
+		exit 1
+	elif ! cpu_is_online $cpu; then
+		echo $FUNCNAME $cpu: unexpected offline >&2
+		exit 1
+	fi
+}
+
+online_cpu_expect_fail()
+{
+	local cpu=$1
+
+	if online_cpu $cpu 2> /dev/null; then
+		echo $FUNCNAME $cpu: unexpected success >&2
+		exit 1
+	elif ! cpu_is_offline $cpu; then
+		echo $FUNCNAME $cpu: unexpected online >&2
+		exit 1
+	fi
+}
+
+offline_cpu_expect_success()
+{
+	local cpu=$1
+
+	if ! offline_cpu $cpu; then
+		echo $FUNCNAME $cpu: unexpected fail >&2
+		exit 1
+	elif ! cpu_is_offline $cpu; then
+		echo $FUNCNAME $cpu: unexpected offline >&2
+		exit 1
+	fi
+}
+
+offline_cpu_expect_fail()
+{
+	local cpu=$1
+
+	if offline_cpu $cpu 2> /dev/null; then
+		echo $FUNCNAME $cpu: unexpected success >&2
+		exit 1
+	elif ! cpu_is_online $cpu; then
+		echo $FUNCNAME $cpu: unexpected offline >&2
+		exit 1
+	fi
+}
+
+error=-12
+allcpus=0
+priority=0
+online_cpus=0
+online_max=0
+offline_cpus=0
+offline_max=0
+present_cpus=0
+present_max=0
+
+while getopts e:ahp: opt; do
+	case $opt in
+	e)
+		error=$OPTARG
+		;;
+	a)
+		allcpus=1
+		;;
+	h)
+		echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]"
+		echo -e "\t default offline one cpu"
+		echo -e "\t run with -a option to offline all cpus"
+		exit
+		;;
+	p)
+		priority=$OPTARG
+		;;
+	esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+	echo "error code must be -4095 <= errno < 0" >&2
+	exit 1
+fi
+
+prerequisite
+
+#
+# Safe test (default) - offline and online one cpu
+#
+if [ $allcpus -eq 0 ]; then
+	echo "Limited scope test: one hotplug cpu"
+	echo -e "\t (leaves cpu in the original state):"
+	echo -e "\t online to offline to online: cpu $online_max"
+	offline_cpu_expect_success $online_max
+	online_cpu_expect_success $online_max
+
+	if [[ $offline_cpus -gt 0 ]]; then
+		echo -e "\t offline to online to offline: cpu $present_max"
+		online_cpu_expect_success $present_max
+		offline_cpu_expect_success $present_max
+		online_cpu $present_max
+	fi
+	exit 0
+else
+	echo "Full scope test: all hotplug cpus"
+	echo -e "\t online all offline cpus"
+	echo -e "\t offline all online cpus"
+	echo -e "\t online all offline cpus"
+fi
+
+#
+# Online all hot-pluggable CPUs
+#
+for cpu in `hotplaggable_offline_cpus`; do
+	online_cpu_expect_success $cpu
+done
+
+#
+# Offline all hot-pluggable CPUs
+#
+for cpu in `hotpluggable_online_cpus`; do
+	offline_cpu_expect_success $cpu
+done
+
+#
+# Online all hot-pluggable CPUs again
+#
+for cpu in `hotplaggable_offline_cpus`; do
+	online_cpu_expect_success $cpu
+done
+
+#
+# Test with cpu notifier error injection
+#
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu
+
+prerequisite_extra()
+{
+	msg="skip extra tests:"
+
+	/sbin/modprobe -q -r cpu-notifier-error-inject
+	/sbin/modprobe -q cpu-notifier-error-inject priority=$priority
+
+	if [ ! -d "$DEBUGFS" ]; then
+		echo $msg debugfs is not mounted >&2
+		exit $ksft_skip
+	fi
+
+	if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
+		echo $msg cpu-notifier-error-inject module is not available >&2
+		exit $ksft_skip
+	fi
+}
+
+prerequisite_extra
+
+#
+# Offline all hot-pluggable CPUs
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+for cpu in `hotpluggable_online_cpus`; do
+	offline_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-add error handling (offline => online)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
+for cpu in `hotplaggable_offline_cpus`; do
+	online_cpu_expect_fail $cpu
+done
+
+#
+# Online all hot-pluggable CPUs
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
+for cpu in `hotplaggable_offline_cpus`; do
+	online_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-remove error handling (online => offline)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+for cpu in `hotpluggable_online_cpus`; do
+	offline_cpu_expect_fail $cpu
+done
+
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+/sbin/modprobe -q -r cpu-notifier-error-inject
diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile
new file mode 100644
index 000000000..c86ca8342
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := main.sh
+TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh special-tests.sh
+
+include ../lib.mk
+
+clean:
diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config
new file mode 100644
index 000000000..27ff72ebd
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/config
@@ -0,0 +1,15 @@
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/cpufreq/cpu.sh b/tools/testing/selftests/cpufreq/cpu.sh
new file mode 100755
index 000000000..39fdcdfb8
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/cpu.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# CPU helpers
+
+# protect against multiple inclusion
+if [ $FILE_CPU ]; then
+	return 0
+else
+	FILE_CPU=DONE
+fi
+
+source cpufreq.sh
+
+for_each_cpu()
+{
+	cpus=$(ls $CPUROOT | grep "cpu[0-9].*")
+	for cpu in $cpus; do
+		$@ $cpu
+	done
+}
+
+for_each_non_boot_cpu()
+{
+	cpus=$(ls $CPUROOT | grep "cpu[1-9].*")
+	for cpu in $cpus; do
+		$@ $cpu
+	done
+}
+
+#$1: cpu
+offline_cpu()
+{
+	printf "Offline $1\n"
+	echo 0 > $CPUROOT/$1/online
+}
+
+#$1: cpu
+online_cpu()
+{
+	printf "Online $1\n"
+	echo 1 > $CPUROOT/$1/online
+}
+
+#$1: cpu
+reboot_cpu()
+{
+	offline_cpu $1
+	online_cpu $1
+}
+
+# Reboot CPUs
+# param: number of times we want to run the loop
+reboot_cpus()
+{
+	printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+	for i in `seq 1 $1`; do
+		for_each_non_boot_cpu offline_cpu
+		for_each_non_boot_cpu online_cpu
+		printf "\n"
+	done
+
+	printf "\n%s\n\n" "------------------------------------------------"
+}
+
+# Prints warning for all CPUs with missing cpufreq directory
+print_unmanaged_cpus()
+{
+	for_each_cpu cpu_should_have_cpufreq_directory
+}
+
+# Counts CPUs with cpufreq directories
+count_cpufreq_managed_cpus()
+{
+	count=0;
+
+	for cpu in `ls $CPUROOT | grep "cpu[0-9].*"`; do
+		if [ -d $CPUROOT/$cpu/cpufreq ]; then
+			let count=count+1;
+		fi
+	done
+
+	echo $count;
+}
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
new file mode 100755
index 000000000..b583a2fb4
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -0,0 +1,242 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# protect against multiple inclusion
+if [ $FILE_CPUFREQ ]; then
+	return 0
+else
+	FILE_CPUFREQ=DONE
+fi
+
+source cpu.sh
+
+
+# $1: cpu
+cpu_should_have_cpufreq_directory()
+{
+	if [ ! -d $CPUROOT/$1/cpufreq ]; then
+		printf "Warning: No cpufreq directory present for $1\n"
+	fi
+}
+
+cpu_should_not_have_cpufreq_directory()
+{
+	if [ -d $CPUROOT/$1/cpufreq ]; then
+		printf "Warning: cpufreq directory present for $1\n"
+	fi
+}
+
+for_each_policy()
+{
+	policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+	for policy in $policies; do
+		$@ $policy
+	done
+}
+
+for_each_policy_concurrent()
+{
+	policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+	for policy in $policies; do
+		$@ $policy &
+	done
+}
+
+# $1: Path
+read_cpufreq_files_in_dir()
+{
+	local files=`ls $1`
+
+	printf "Printing directory: $1\n\n"
+
+	for file in $files; do
+		if [ -f $1/$file ]; then
+			printf "$file:"
+			cat $1/$file
+		else
+			printf "\n"
+			read_cpufreq_files_in_dir "$1/$file"
+		fi
+	done
+	printf "\n"
+}
+
+
+read_all_cpufreq_files()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	read_cpufreq_files_in_dir $CPUFREQROOT
+
+	printf "%s\n\n" "------------------------------------------------"
+}
+
+
+# UPDATE CPUFREQ FILES
+
+# $1: directory path
+update_cpufreq_files_in_dir()
+{
+	local files=`ls $1`
+
+	printf "Updating directory: $1\n\n"
+
+	for file in $files; do
+		if [ -f $1/$file ]; then
+			# file is writable ?
+			local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }')
+
+			if [ ! -z $wfile ]; then
+				# scaling_setspeed is a special file and we
+				# should skip updating it
+				if [ $file != "scaling_setspeed" ]; then
+					local val=$(cat $1/$file)
+					printf "Writing $val to: $file\n"
+					echo $val > $1/$file
+				fi
+			fi
+		else
+			printf "\n"
+			update_cpufreq_files_in_dir "$1/$file"
+		fi
+	done
+
+	printf "\n"
+}
+
+# Update all writable files with their existing values
+update_all_cpufreq_files()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	update_cpufreq_files_in_dir $CPUFREQROOT
+
+	printf "%s\n\n" "------------------------------------------------"
+}
+
+
+# CHANGE CPU FREQUENCIES
+
+# $1: policy
+find_current_freq()
+{
+	cat $CPUFREQROOT/$1/scaling_cur_freq
+}
+
+# $1: policy
+# $2: frequency
+set_cpu_frequency()
+{
+	printf "Change frequency for $1 to $2\n"
+	echo $2 > $CPUFREQROOT/$1/scaling_setspeed
+}
+
+# $1: policy
+test_all_frequencies()
+{
+	local filepath="$CPUFREQROOT/$1"
+
+	backup_governor $1
+
+	local found=$(switch_governor $1 "userspace")
+	if [ $found = 1 ]; then
+		printf "${FUNCNAME[0]}: userspace governor not available for: $1\n"
+		return;
+	fi
+
+	printf "Switched governor for $1 to userspace\n\n"
+
+	local freqs=$(cat $filepath/scaling_available_frequencies)
+	printf "Available frequencies for $1: $freqs\n\n"
+
+	# Set all frequencies one-by-one
+	for freq in $freqs; do
+		set_cpu_frequency $1 $freq
+	done
+
+	printf "\n"
+
+	restore_governor $1
+}
+
+# $1: loop count
+shuffle_frequency_for_all_cpus()
+{
+	printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+	for i in `seq 1 $1`; do
+		for_each_policy test_all_frequencies
+	done
+	printf "\n%s\n\n" "------------------------------------------------"
+}
+
+# Basic cpufreq tests
+cpufreq_basic_tests()
+{
+	printf "*** RUNNING CPUFREQ SANITY TESTS ***\n"
+	printf "====================================\n\n"
+
+	count=$(count_cpufreq_managed_cpus)
+	if [ $count = 0 ]; then
+		printf "No cpu is managed by cpufreq core, exiting\n"
+		exit;
+	else
+		printf "CPUFreq manages: $count CPUs\n\n"
+	fi
+
+	# Detect & print which CPUs are not managed by cpufreq
+	print_unmanaged_cpus
+
+	# read/update all cpufreq files
+	read_all_cpufreq_files
+	update_all_cpufreq_files
+
+	# hotplug cpus
+	reboot_cpus 5
+
+	# Test all frequencies
+	shuffle_frequency_for_all_cpus 2
+
+	# Test all governors
+	shuffle_governors_for_all_cpus 1
+}
+
+# Suspend/resume
+# $1: "suspend" or "hibernate", $2: loop count
+do_suspend()
+{
+	printf "** Test: Running ${FUNCNAME[0]}: Trying $1 for $2 loops **\n\n"
+
+	# Is the directory available
+	if [ ! -d $SYSFS/power/ -o ! -f $SYSFS/power/state ]; then
+		printf "$SYSFS/power/state not available\n"
+		return 1
+	fi
+
+	if [ $1 = "suspend" ]; then
+		filename="mem"
+	elif [ $1 = "hibernate" ]; then
+		filename="disk"
+	else
+		printf "$1 is not a valid option\n"
+		return 1
+	fi
+
+	if [ -n $filename ]; then
+		present=$(cat $SYSFS/power/state | grep $filename)
+
+		if [ -z "$present" ]; then
+			printf "Tried to $1 but $filename isn't present in $SYSFS/power/state\n"
+			return 1;
+		fi
+
+		for i in `seq 1 $2`; do
+			printf "Starting $1\n"
+			echo $filename > $SYSFS/power/state
+			printf "Came out of $1\n"
+
+			printf "Do basic tests after finishing $1 to verify cpufreq state\n\n"
+			cpufreq_basic_tests
+		done
+	fi
+}
diff --git a/tools/testing/selftests/cpufreq/governor.sh b/tools/testing/selftests/cpufreq/governor.sh
new file mode 100755
index 000000000..fe37df79c
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/governor.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test governors
+
+# protect against multiple inclusion
+if [ $FILE_GOVERNOR ]; then
+	return 0
+else
+	FILE_GOVERNOR=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+
+CUR_GOV=
+CUR_FREQ=
+
+# Find governor's directory path
+# $1: policy, $2: governor
+find_gov_directory()
+{
+	if [ -d $CPUFREQROOT/$2 ]; then
+		printf "$CPUFREQROOT/$2\n"
+	elif [ -d $CPUFREQROOT/$1/$2 ]; then
+		printf "$CPUFREQROOT/$1/$2\n"
+	else
+		printf "INVALID\n"
+	fi
+}
+
+# $1: policy
+find_current_governor()
+{
+	cat $CPUFREQROOT/$1/scaling_governor
+}
+
+# $1: policy
+backup_governor()
+{
+	CUR_GOV=$(find_current_governor $1)
+
+	printf "Governor backup done for $1: $CUR_GOV\n"
+
+	if [ $CUR_GOV == "userspace" ]; then
+		CUR_FREQ=$(find_current_freq $1)
+		printf "Governor frequency backup done for $1: $CUR_FREQ\n"
+	fi
+
+	printf "\n"
+}
+
+# $1: policy
+restore_governor()
+{
+	__switch_governor $1 $CUR_GOV
+
+	printf "Governor restored for $1 to $CUR_GOV\n"
+
+	if [ $CUR_GOV == "userspace" ]; then
+		set_cpu_frequency $1 $CUR_FREQ
+		printf "Governor frequency restored for $1: $CUR_FREQ\n"
+	fi
+
+	printf "\n"
+}
+
+# param:
+# $1: policy, $2: governor
+__switch_governor()
+{
+	echo $2 > $CPUFREQROOT/$1/scaling_governor
+}
+
+# param:
+# $1: cpu, $2: governor
+__switch_governor_for_cpu()
+{
+	echo $2 > $CPUROOT/$1/cpufreq/scaling_governor
+}
+
+# SWITCH GOVERNORS
+
+# $1: cpu, $2: governor
+switch_governor()
+{
+	local filepath=$CPUFREQROOT/$1/scaling_available_governors
+
+	# check if governor is available
+	local found=$(cat $filepath | grep $2 | wc -l)
+	if [ $found = 0 ]; then
+		echo 1;
+		return
+	fi
+
+	__switch_governor $1 $2
+	echo 0;
+}
+
+# $1: policy, $2: governor
+switch_show_governor()
+{
+	cur_gov=find_current_governor
+	if [ $cur_gov == "userspace" ]; then
+		cur_freq=find_current_freq
+	fi
+
+	# switch governor
+	__switch_governor $1 $2
+
+	printf "\nSwitched governor for $1 to $2\n\n"
+
+	if [ $2 == "userspace" -o $2 == "powersave" -o $2 == "performance" ]; then
+		printf "No files to read for $2 governor\n\n"
+		return
+	fi
+
+	# show governor files
+	local govpath=$(find_gov_directory $1 $2)
+	read_cpufreq_files_in_dir $govpath
+}
+
+# $1: function to be called, $2: policy
+call_for_each_governor()
+{
+	local filepath=$CPUFREQROOT/$2/scaling_available_governors
+
+	# Exit if cpu isn't managed by cpufreq core
+	if [ ! -f $filepath ]; then
+		return;
+	fi
+
+	backup_governor $2
+
+	local governors=$(cat $filepath)
+	printf "Available governors for $2: $governors\n"
+
+	for governor in $governors; do
+		$1 $2 $governor
+	done
+
+	restore_governor $2
+}
+
+# $1: loop count
+shuffle_governors_for_all_cpus()
+{
+	printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+	for i in `seq 1 $1`; do
+		for_each_policy call_for_each_governor switch_show_governor
+	done
+	printf "%s\n\n" "------------------------------------------------"
+}
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
new file mode 100755
index 000000000..31f8c9a76
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+source module.sh
+source special-tests.sh
+
+FUNC=basic	# do basic tests by default
+OUTFILE=cpufreq_selftest
+SYSFS=
+CPUROOT=
+CPUFREQROOT=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+helpme()
+{
+	printf "Usage: $0 [-h] [-todg args]
+	[-h <help>]
+	[-o <output-file-for-dump>]
+	[-t <basic: Basic cpufreq testing
+	     suspend: suspend/resume,
+	     hibernate: hibernate/resume,
+	     modtest: test driver or governor modules. Only to be used with -d or -g options,
+	     sptest1: Simple governor switch to produce lockdep.
+	     sptest2: Concurrent governor switch to produce lockdep.
+	     sptest3: Governor races, shuffle between governors quickly.
+	     sptest4: CPU hotplugs with updates to cpufreq files.>]
+	[-d <driver's module name: only with \"-t modtest>\"]
+	[-g <governor's module name: only with \"-t modtest>\"]
+	\n"
+	exit 2
+}
+
+prerequisite()
+{
+	msg="skip all tests:"
+
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit $ksft_skip
+	fi
+
+	taskset -p 01 $$
+
+	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+	if [ ! -d "$SYSFS" ]; then
+		echo $msg sysfs is not mounted >&2
+		exit 2
+	fi
+
+	CPUROOT=$SYSFS/devices/system/cpu
+	CPUFREQROOT="$CPUROOT/cpufreq"
+
+	if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then
+		echo $msg cpus not available in sysfs >&2
+		exit 2
+	fi
+
+	if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then
+		echo $msg cpufreq directory not available in sysfs >&2
+		exit 2
+	fi
+}
+
+parse_arguments()
+{
+	while getopts ht:o:d:g: arg
+	do
+		case $arg in
+			h) # --help
+				helpme
+				;;
+
+			t) # --func_type (Function to perform: basic, suspend, hibernate, modtest, sptest1/2/3/4 (default: basic))
+				FUNC=$OPTARG
+				;;
+
+			o) # --output-file (Output file to store dumps)
+				OUTFILE=$OPTARG
+				;;
+
+			d) # --driver-mod-name (Name of the driver module)
+				DRIVER_MOD=$OPTARG
+				;;
+
+			g) # --governor-mod-name (Name of the governor module)
+				GOVERNOR_MOD=$OPTARG
+				;;
+
+			\?)
+				helpme
+				;;
+		esac
+	done
+}
+
+do_test()
+{
+	# Check if CPUs are managed by cpufreq or not
+	count=$(count_cpufreq_managed_cpus)
+
+	if [ $count = 0 -a $FUNC != "modtest" ]; then
+		echo "No cpu is managed by cpufreq core, exiting"
+		exit 2;
+	fi
+
+	case "$FUNC" in
+		"basic")
+		cpufreq_basic_tests
+		;;
+
+		"suspend")
+		do_suspend "suspend" 1
+		;;
+
+		"hibernate")
+		do_suspend "hibernate" 1
+		;;
+
+		"modtest")
+		# Do we have modules in place?
+		if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then
+			echo "No driver or governor module passed with -d or -g"
+			exit 2;
+		fi
+
+		if [ $DRIVER_MOD ]; then
+			if [ $GOVERNOR_MOD ]; then
+				module_test $DRIVER_MOD $GOVERNOR_MOD
+			else
+				module_driver_test $DRIVER_MOD
+			fi
+		else
+			if [ $count = 0 ]; then
+				echo "No cpu is managed by cpufreq core, exiting"
+				exit 2;
+			fi
+
+			module_governor_test $GOVERNOR_MOD
+		fi
+		;;
+
+		"sptest1")
+		simple_lockdep
+		;;
+
+		"sptest2")
+		concurrent_lockdep
+		;;
+
+		"sptest3")
+		governor_race
+		;;
+
+		"sptest4")
+		hotplug_with_updates
+		;;
+
+		*)
+		echo "Invalid [-f] function type"
+		helpme
+		;;
+	esac
+}
+
+# clear dumps
+# $1: file name
+clear_dumps()
+{
+	echo "" > $1.txt
+	echo "" > $1.dmesg_cpufreq.txt
+	echo "" > $1.dmesg_full.txt
+}
+
+# $1: output file name
+dmesg_dumps()
+{
+	dmesg | grep cpufreq >> $1.dmesg_cpufreq.txt
+
+	# We may need the full logs as well
+	dmesg >> $1.dmesg_full.txt
+}
+
+# Parse arguments
+parse_arguments $@
+
+# Make sure all requirements are met
+prerequisite
+
+# Run requested functions
+clear_dumps $OUTFILE
+do_test >> $OUTFILE.txt
+dmesg_dumps $OUTFILE
diff --git a/tools/testing/selftests/cpufreq/module.sh b/tools/testing/selftests/cpufreq/module.sh
new file mode 100755
index 000000000..22563cd12
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/module.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Modules specific tests cases
+
+# protect against multiple inclusion
+if [ $FILE_MODULE ]; then
+	return 0
+else
+	FILE_MODULE=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+# Check basic insmod/rmmod
+# $1: module
+test_basic_insmod_rmmod()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	printf "Inserting $1 module\n"
+	# insert module
+	insmod $1
+	if [ $? != 0 ]; then
+		printf "Insmod $1 failed\n"
+		exit;
+	fi
+
+	printf "Removing $1 module\n"
+	# remove module
+	rmmod $1
+	if [ $? != 0 ]; then
+		printf "rmmod $1 failed\n"
+		exit;
+	fi
+
+	printf "\n"
+}
+
+# Insert cpufreq driver module and perform basic tests
+# $1: cpufreq-driver module to insert
+# $2: If we want to play with CPUs (1) or not (0)
+module_driver_test_single()
+{
+	printf "** Test: Running ${FUNCNAME[0]} for driver $1 and cpus_hotplug=$2 **\n\n"
+
+	if [ $2 -eq 1 ]; then
+		# offline all non-boot CPUs
+		for_each_non_boot_cpu offline_cpu
+		printf "\n"
+	fi
+
+	# insert module
+	printf "Inserting $1 module\n\n"
+	insmod $1
+	if [ $? != 0 ]; then
+		printf "Insmod $1 failed\n"
+		return;
+	fi
+
+	if [ $2 -eq 1 ]; then
+		# online all non-boot CPUs
+		for_each_non_boot_cpu online_cpu
+		printf "\n"
+	fi
+
+	# run basic tests
+	cpufreq_basic_tests
+
+	# remove module
+	printf "Removing $1 module\n\n"
+	rmmod $1
+	if [ $? != 0 ]; then
+		printf "rmmod $1 failed\n"
+		return;
+	fi
+
+	# There shouldn't be any cpufreq directories now.
+	for_each_cpu cpu_should_not_have_cpufreq_directory
+	printf "\n"
+}
+
+# $1: cpufreq-driver module to insert
+module_driver_test()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	# check if module is present or not
+	ls $1 > /dev/null
+	if [ $? != 0 ]; then
+		printf "$1: not present in `pwd` folder\n"
+		return;
+	fi
+
+	# test basic module tests
+	test_basic_insmod_rmmod $1
+
+	# Do simple module test
+	module_driver_test_single $1 0
+
+	# Remove CPUs before inserting module and then bring them back
+	module_driver_test_single $1 1
+	printf "\n"
+}
+
+# find governor name based on governor module name
+# $1: governor module name
+find_gov_name()
+{
+	if [ $1 = "cpufreq_ondemand.ko" ]; then
+		printf "ondemand"
+	elif [ $1 = "cpufreq_conservative.ko" ]; then
+		printf "conservative"
+	elif [ $1 = "cpufreq_userspace.ko" ]; then
+		printf "userspace"
+	elif [ $1 = "cpufreq_performance.ko" ]; then
+		printf "performance"
+	elif [ $1 = "cpufreq_powersave.ko" ]; then
+		printf "powersave"
+	elif [ $1 = "cpufreq_schedutil.ko" ]; then
+		printf "schedutil"
+	fi
+}
+
+# $1: governor string, $2: governor module, $3: policy
+# example: module_governor_test_single "ondemand" "cpufreq_ondemand.ko" 2
+module_governor_test_single()
+{
+	printf "** Test: Running ${FUNCNAME[0]} for $3 **\n\n"
+
+	backup_governor $3
+
+	# switch to new governor
+	printf "Switch from $CUR_GOV to $1\n"
+	switch_show_governor $3 $1
+
+	# try removing module, it should fail as governor is used
+	printf "Removing $2 module\n\n"
+	rmmod $2
+	if [ $? = 0 ]; then
+		printf "WARN: rmmod $2 succeeded even if governor is used\n"
+		insmod $2
+	else
+		printf "Pass: unable to remove $2 while it is being used\n\n"
+	fi
+
+	# switch back to old governor
+	printf "Switchback to $CUR_GOV from $1\n"
+	restore_governor $3
+	printf "\n"
+}
+
+# Insert cpufreq governor module and perform basic tests
+# $1: cpufreq-governor module to insert
+module_governor_test()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	# check if module is present or not
+	ls $1 > /dev/null
+	if [ $? != 0 ]; then
+		printf "$1: not present in `pwd` folder\n"
+		return;
+	fi
+
+	# test basic module tests
+	test_basic_insmod_rmmod $1
+
+	# insert module
+	printf "Inserting $1 module\n\n"
+	insmod $1
+	if [ $? != 0 ]; then
+		printf "Insmod $1 failed\n"
+		return;
+	fi
+
+	# switch to new governor for each cpu
+	for_each_policy module_governor_test_single $(find_gov_name $1) $1
+
+	# remove module
+	printf "Removing $1 module\n\n"
+	rmmod $1
+	if [ $? != 0 ]; then
+		printf "rmmod $1 failed\n"
+		return;
+	fi
+	printf "\n"
+}
+
+# test modules: driver and governor
+# $1: driver module, $2: governor module
+module_test()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+	# check if modules are present or not
+	ls $1 $2 > /dev/null
+	if [ $? != 0 ]; then
+		printf "$1 or $2: is not present in `pwd` folder\n"
+		return;
+	fi
+
+	# TEST1: Insert gov after driver
+	# insert driver module
+	printf "Inserting $1 module\n\n"
+	insmod $1
+	if [ $? != 0 ]; then
+		printf "Insmod $1 failed\n"
+		return;
+	fi
+
+	# run governor tests
+	module_governor_test $2
+
+	# remove driver module
+	printf "Removing $1 module\n\n"
+	rmmod $1
+	if [ $? != 0 ]; then
+		printf "rmmod $1 failed\n"
+		return;
+	fi
+
+	# TEST2: Insert driver after governor
+	# insert governor module
+	printf "Inserting $2 module\n\n"
+	insmod $2
+	if [ $? != 0 ]; then
+		printf "Insmod $2 failed\n"
+		return;
+	fi
+
+	# run governor tests
+	module_driver_test $1
+
+	# remove driver module
+	printf "Removing $2 module\n\n"
+	rmmod $2
+	if [ $? != 0 ]; then
+		printf "rmmod $2 failed\n"
+		return;
+	fi
+}
diff --git a/tools/testing/selftests/cpufreq/special-tests.sh b/tools/testing/selftests/cpufreq/special-tests.sh
new file mode 100755
index 000000000..8d40505dc
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/special-tests.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Special test cases reported by people
+
+# Testcase 1: Reported here: http://marc.info/?l=linux-pm&m=140618592709858&w=2
+
+# protect against multiple inclusion
+if [ $FILE_SPECIAL ]; then
+	return 0
+else
+	FILE_SPECIAL=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+# Test 1
+# $1: policy
+__simple_lockdep()
+{
+	# switch to ondemand
+	__switch_governor $1 "ondemand"
+
+	# cat ondemand files
+	local ondir=$(find_gov_directory $1 "ondemand")
+	if [ -z $ondir ]; then
+		printf "${FUNCNAME[0]}Ondemand directory not created, quit"
+		return
+	fi
+
+	cat $ondir/*
+
+	# switch to conservative
+	__switch_governor $1 "conservative"
+}
+
+simple_lockdep()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+	for_each_policy __simple_lockdep
+}
+
+# Test 2
+# $1: policy
+__concurrent_lockdep()
+{
+	for i in `seq 0 100`; do
+		__simple_lockdep $1
+	done
+}
+
+concurrent_lockdep()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+	for_each_policy_concurrent __concurrent_lockdep
+}
+
+# Test 3
+quick_shuffle()
+{
+	# this is called concurrently from governor_race
+	for I in `seq 1000`
+	do
+		echo ondemand | sudo tee $CPUFREQROOT/policy*/scaling_governor &
+		echo userspace | sudo tee $CPUFREQROOT/policy*/scaling_governor &
+	done
+}
+
+governor_race()
+{
+	printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+	# run 8 concurrent instances
+	for I in `seq 8`
+	do
+		quick_shuffle &
+	done
+}
+
+# Test 4
+# $1: cpu
+hotplug_with_updates_cpu()
+{
+	local filepath="$CPUROOT/$1/cpufreq"
+
+	# switch to ondemand
+	__switch_governor_for_cpu $1 "ondemand"
+
+	for i in `seq 1 5000`
+	do
+		reboot_cpu $1
+	done &
+
+	local freqs=$(cat $filepath/scaling_available_frequencies)
+	local oldfreq=$(cat $filepath/scaling_min_freq)
+
+	for j in `seq 1 5000`
+	do
+		# Set all frequencies one-by-one
+		for freq in $freqs; do
+			echo $freq > $filepath/scaling_min_freq
+		done
+	done
+
+	# restore old freq
+	echo $oldfreq > $filepath/scaling_min_freq
+}
+
+hotplug_with_updates()
+{
+	for_each_non_boot_cpu hotplug_with_updates_cpu
+}
diff --git a/tools/testing/selftests/drivers/gpu/drm_mm.sh b/tools/testing/selftests/drivers/gpu/drm_mm.sh
new file mode 100755
index 000000000..b789dc825
--- /dev/null
+++ b/tools/testing/selftests/drivers/gpu/drm_mm.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs API tests for struct drm_mm (DRM range manager)
+
+if ! /sbin/modprobe -n -q test-drm_mm; then
+       echo "drivers/gpu/drm_mm: [skip]"
+       exit 77
+fi
+
+if /sbin/modprobe -q test-drm_mm; then
+       /sbin/modprobe -q -r test-drm_mm
+       echo "drivers/gpu/drm_mm: ok"
+else
+       echo "drivers/gpu/drm_mm: [FAIL]"
+       exit 1
+fi
diff --git a/tools/testing/selftests/drivers/gpu/i915.sh b/tools/testing/selftests/drivers/gpu/i915.sh
new file mode 100755
index 000000000..d3895bc71
--- /dev/null
+++ b/tools/testing/selftests/drivers/gpu/i915.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs hardware independent tests for i915 (drivers/gpu/drm/i915)
+
+if ! /sbin/modprobe -q -r i915; then
+	echo "drivers/gpu/i915: [SKIP]"
+	exit 77
+fi
+
+if /sbin/modprobe -q i915 mock_selftests=-1; then
+	/sbin/modprobe -q -r i915
+	echo "drivers/gpu/i915: ok"
+else
+	echo "drivers/gpu/i915: [FAIL]"
+	exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
new file mode 100755
index 000000000..76f1ab489
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# ../../../net/forwarding/mirror_gre_topo_lib.sh for more details.
+#
+# Test offloading various features of offloading gretap mirrors specific to
+# mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/mirror_lib.sh
+source $lib_dir/mirror_gre_lib.sh
+source $lib_dir/mirror_gre_topo_lib.sh
+
+setup_keyful()
+{
+	tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
+		      ttl 100 tos inherit allow-localremote \
+		      key 1234
+
+	tunnel_create h3-gt6-key ip6gretap 2001:db8:3::2 2001:db8:3::1 \
+		      key 1234
+	ip link set h3-gt6-key vrf v$h3
+	matchall_sink_create h3-gt6-key
+
+	ip address add dev $swp3 2001:db8:3::1/64
+	ip address add dev $h3 2001:db8:3::2/64
+}
+
+cleanup_keyful()
+{
+	ip address del dev $h3 2001:db8:3::2/64
+	ip address del dev $swp3 2001:db8:3::1/64
+
+	tunnel_destroy h3-gt6-key
+	tunnel_destroy gt6-key
+}
+
+setup_soft()
+{
+	# Set up a topology for testing underlay routes that point at an
+	# unsupported soft device.
+
+	tunnel_create gt6-soft ip6gretap 2001:db8:4::1 2001:db8:4::2 \
+		      ttl 100 tos inherit allow-localremote
+
+	tunnel_create h3-gt6-soft ip6gretap 2001:db8:4::2 2001:db8:4::1
+	ip link set h3-gt6-soft vrf v$h3
+	matchall_sink_create h3-gt6-soft
+
+	ip link add name v1 type veth peer name v2
+	ip link set dev v1 up
+	ip address add dev v1 2001:db8:4::1/64
+
+	ip link set dev v2 vrf v$h3
+	ip link set dev v2 up
+	ip address add dev v2 2001:db8:4::2/64
+}
+
+cleanup_soft()
+{
+	ip link del dev v1
+
+	tunnel_destroy h3-gt6-soft
+	tunnel_destroy gt6-soft
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	setup_keyful
+	setup_soft
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	cleanup_soft
+	cleanup_keyful
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_ttl_inherit()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type ttl inherit
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type ttl 100
+
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: no offload on TTL of inherit ($tcflags)"
+}
+
+test_span_gre_tos_fixed()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type tos 0x10
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type tos inherit
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: no offload on a fixed TOS ($tcflags)"
+}
+
+test_span_failable()
+{
+	local should_fail=$1; shift
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	if ((should_fail)); then
+	    fail_test_span_gre_dir  $tundev ingress
+	else
+	    quick_test_span_gre_dir $tundev ingress
+	fi
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: should_fail=$should_fail ($tcflags)"
+}
+
+test_failable()
+{
+	local should_fail=$1; shift
+
+	test_span_failable $should_fail gt6-key "mirror to keyful gretap"
+	test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay"
+}
+
+test_sw()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	test_failable 0
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+test_hw()
+{
+	test_failable 1
+
+	test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
+	test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+
+	test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
+	test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+if ! tc_offload_check; then
+    check_err 1 "Could not test offloaded functionality"
+    log_test "mlxsw-specific tests for mirror to gretap"
+    exit
+fi
+
+tcflags="skip_hw"
+test_sw
+
+tcflags="skip_sw"
+test_hw
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
new file mode 100644
index 000000000..6f3a70df6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -0,0 +1,197 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Test offloading a number of mirrors-to-gretap. The test creates a number of
+# tunnels. Then it adds one flower mirror for each of the tunnels, matching a
+# given host IP. Then it generates traffic at each of the host IPs and checks
+# that the traffic has been mirrored at the appropriate tunnel.
+#
+#   +--------------------------+                   +--------------------------+
+#   | H1                       |                   |                       H2 |
+#   |     + $h1                |                   |                $h2 +     |
+#   |     | 2001:db8:1:X::1/64 |                   | 2001:db8:1:X::2/64 |     |
+#   +-----|--------------------+                   +--------------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirrors                                                  |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3                          + gt6-<X> (ip6gretap)              |
+#   |     | 2001:db8:2:X::1/64             : loc=2001:db8:2:X::1              |
+#   |     |                                : rem=2001:db8:2:X::2              |
+#   |     |                                : ttl=100                          |
+#   |     |                                : tos=inherit                      |
+#   |     |                                :                                  |
+#   +-----|--------------------------------:----------------------------------+
+#         |                                :
+#   +-----|--------------------------------:----------------------------------+
+#   | H3  + $h3                            + h3-gt6-<X> (ip6gretap)           |
+#   |       2001:db8:2:X::2/64               loc=2001:db8:2:X::2              |
+#   |                                        rem=2001:db8:2:X::1              |
+#   |                                        ttl=100                          |
+#   |                                        tos=inherit                      |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+source ../../../../net/forwarding/mirror_lib.sh
+
+MIRROR_NUM_NETIFS=6
+
+mirror_gre_ipv6_addr()
+{
+	local net=$1; shift
+	local num=$1; shift
+
+	printf "2001:db8:%x:%x" $net $num
+}
+
+mirror_gre_tunnels_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	MIRROR_GRE_BATCH_FILE="$(mktemp)"
+	for ((i=0; i < count; ++i)); do
+		local match_dip=$(mirror_gre_ipv6_addr 1 $i)::2
+		local htun=h3-gt6-$i
+		local tun=gt6-$i
+
+		((mirror_gre_tunnels++))
+
+		ip address add dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+		ip address add dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+
+		ip address add dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+		ip address add dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+
+		tunnel_create $tun ip6gretap \
+			      $(mirror_gre_ipv6_addr 2 $i)::1 \
+			      $(mirror_gre_ipv6_addr 2 $i)::2 \
+			      ttl 100 tos inherit allow-localremote
+
+		tunnel_create $htun ip6gretap \
+			      $(mirror_gre_ipv6_addr 2 $i)::2 \
+			      $(mirror_gre_ipv6_addr 2 $i)::1
+		ip link set $htun vrf v$h3
+		matchall_sink_create $htun
+
+		cat >> $MIRROR_GRE_BATCH_FILE <<-EOF
+			filter add dev $swp1 ingress pref 1000 \
+				protocol ipv6 \
+				flower $tcflags dst_ip $match_dip \
+				action mirred egress mirror dev $tun
+		EOF
+	done
+
+	tc -b $MIRROR_GRE_BATCH_FILE
+	check_err_fail $should_fail $? "Mirror rule insertion"
+}
+
+mirror_gre_tunnels_destroy()
+{
+	local count=$1; shift
+
+	for ((i=0; i < count; ++i)); do
+		local htun=h3-gt6-$i
+		local tun=gt6-$i
+
+		ip address del dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+		ip address del dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+
+		ip address del dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+		ip address del dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+
+		tunnel_destroy $htun
+		tunnel_destroy $tun
+	done
+}
+
+__mirror_gre_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	mirror_gre_tunnels_create $count $should_fail
+	if ((should_fail)); then
+	    return
+	fi
+
+	sleep 5
+
+	for ((i = 0; i < count; ++i)); do
+		local dip=$(mirror_gre_ipv6_addr 1 $i)::2
+		local htun=h3-gt6-$i
+		local message
+
+		icmp6_capture_install $htun
+		mirror_test v$h1 "" $dip $htun 100 10
+		icmp6_capture_uninstall $htun
+	done
+}
+
+mirror_gre_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	tcflags="skip_sw"
+	__mirror_gre_test $count $should_fail
+}
+
+mirror_gre_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	mirror_gre_tunnels=0
+
+	vrf_prepare
+
+	simple_if_init $h1
+	simple_if_init $h2
+	simple_if_init $h3
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	ip link set dev $swp3 up
+}
+
+mirror_gre_cleanup()
+{
+	mirror_gre_tunnels_destroy $mirror_gre_tunnels
+
+	ip link set dev $swp3 down
+
+	ip link set dev $swp2 down
+
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+
+	ip link del dev br1
+
+	simple_if_fini $h3
+	simple_if_fini $h2
+	simple_if_fini $h1
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
new file mode 100755
index 000000000..1ca631d5a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization and rewrite. Packets ingress $swp1 with a DSCP
+# tag and are prioritized according to the map at $swp1. They egress $swp2 and
+# the DSCP value is updated to match the map at that interface. The updated DSCP
+# tag is verified at $h2.
+#
+# ICMP responses are produced with the same DSCP tag that arrived at $h2. They
+# go through prioritization at $swp2 and DSCP retagging at $swp1. The tag is
+# verified at $h1--it should match the original tag.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |   192.0.2.2/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |  +-|----------------------------------------------------------------|-+   |
+# |  | + $swp1                       BR                           $swp2 + |   |
+# |  |   APP=0,5,10 .. 7,5,17                      APP=0,5,20 .. 7,5,27   |   |
+# |  +--------------------------------------------------------------------+   |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_dscp
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+h1_create()
+{
+	local dscp;
+
+	simple_if_init $h1 192.0.2.1/28
+	tc qdisc add dev $h1 clsact
+	dscp_capture_install $h1 10
+}
+
+h1_destroy()
+{
+	dscp_capture_uninstall $h1 10
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+	tc qdisc add dev $h2 clsact
+	dscp_capture_install $h2 20
+}
+
+h2_destroy()
+{
+	dscp_capture_uninstall $h2 20
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+dscp_map()
+{
+	local base=$1; shift
+
+	for prio in {0..7}; do
+		echo app=$prio,5,$((base + prio))
+	done
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null
+	lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null
+	lldpad_app_wait_set $swp1
+	lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+	lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null
+	lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
+	lldpad_app_wait_del
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp1 nomaster
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+dscp_ping_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local prio=$1; shift
+	local dev_10=$1; shift
+	local dev_20=$1; shift
+
+	local dscp_10=$(((prio + 10) << 2))
+	local dscp_20=$(((prio + 20) << 2))
+
+	RET=0
+
+	local -A t0s
+	eval "t0s=($(dscp_fetch_stats $dev_10 10)
+		   $(dscp_fetch_stats $dev_20 20))"
+
+	ip vrf exec $vrf_name \
+	   ${PING} -Q $dscp_10 ${sip:+-I $sip} $dip \
+		   -c 10 -i 0.1 -w 2 &> /dev/null
+
+	local -A t1s
+	eval "t1s=($(dscp_fetch_stats $dev_10 10)
+		   $(dscp_fetch_stats $dev_20 20))"
+
+	for key in ${!t0s[@]}; do
+		local expect
+		if ((key == prio+10 || key == prio+20)); then
+			expect=10
+		else
+			expect=0
+		fi
+
+		local delta=$((t1s[$key] - t0s[$key]))
+		((expect == delta))
+		check_err $? "DSCP $key: Expected to capture $expect packets, got $delta."
+	done
+
+	log_test "DSCP rewrite: $dscp_10-(prio $prio)-$dscp_20"
+}
+
+test_dscp()
+{
+	for prio in {0..7}; do
+		dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2
+	done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
new file mode 100755
index 000000000..281d90766
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization in the router.
+#
+# With ip_forward_update_priority disabled, the packets are expected to keep
+# their DSCP (which in this test uses only values 0..7) intact as they are
+# forwarded by the switch. That is verified at $h2. ICMP responses are formed
+# with the same DSCP as the requests, and likewise pass through the switch
+# intact, which is verified at $h1.
+#
+# With ip_forward_update_priority enabled, router reprioritizes the packets
+# according to the table in reprioritize(). Thus, say, DSCP 7 maps to priority
+# 4, which on egress maps back to DSCP 4. The response packet then gets
+# reprioritized to 6, getting DSCP 6 on egress.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1             |                             |            $h2 +     |
+# |    | 192.0.2.1/28    |                             |  192.0.2.18/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                                                |
+# +----|----------------------------------------------------------------|-----+
+# | SW |                                                                |     |
+# |    + $swp1                                                    $swp2 +     |
+# |      192.0.2.2/28                                     192.0.2.17/28       |
+# |      APP=0,5,0 .. 7,5,7                          APP=0,5,0 .. 7,5,7       |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	test_update
+	test_no_update
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+reprioritize()
+{
+	local in=$1; shift
+
+	# This is based on rt_tos2priority in include/net/route.h. Assuming 1:1
+	# mapping between priorities and TOS, it yields a new priority for a
+	# packet with ingress priority of $in.
+	local -a reprio=(0 0 2 2 6 6 4 4)
+
+	echo ${reprio[$in]}
+}
+
+h1_create()
+{
+	local dscp;
+
+	simple_if_init $h1 192.0.2.1/28
+	tc qdisc add dev $h1 clsact
+	dscp_capture_install $h1 0
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	dscp_capture_uninstall $h1 0
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28
+	tc qdisc add dev $h2 clsact
+	dscp_capture_install $h2 0
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	dscp_capture_uninstall $h2 0
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.18/28
+}
+
+dscp_map()
+{
+	local base=$1; shift
+
+	for prio in {0..7}; do
+		echo app=$prio,5,$((base + prio))
+	done
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/28
+	__simple_if_init $swp2 v$swp1 192.0.2.17/28
+
+	lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
+	lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
+	lldpad_app_wait_set $swp1
+	lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+	lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
+	lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
+	lldpad_app_wait_del
+
+	__simple_if_fini $swp2 192.0.2.17/28
+	simple_if_fini $swp1 192.0.2.2/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	sysctl_set net.ipv4.ip_forward_update_priority 1
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+	sysctl_restore net.ipv4.ip_forward_update_priority
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+dscp_ping_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local prio=$1; shift
+	local reprio=$1; shift
+	local dev1=$1; shift
+	local dev2=$1; shift
+
+	local prio2=$($reprio $prio)   # ICMP Request egress prio
+	local prio3=$($reprio $prio2)  # ICMP Response egress prio
+
+	local dscp=$((prio << 2))     # ICMP Request ingress DSCP
+	local dscp2=$((prio2 << 2))   # ICMP Request egress DSCP
+	local dscp3=$((prio3 << 2))   # ICMP Response egress DSCP
+
+	RET=0
+
+	eval "local -A dev1_t0s=($(dscp_fetch_stats $dev1 0))"
+	eval "local -A dev2_t0s=($(dscp_fetch_stats $dev2 0))"
+
+	ip vrf exec $vrf_name \
+	   ${PING} -Q $dscp ${sip:+-I $sip} $dip \
+		   -c 10 -i 0.1 -w 2 &> /dev/null
+
+	eval "local -A dev1_t1s=($(dscp_fetch_stats $dev1 0))"
+	eval "local -A dev2_t1s=($(dscp_fetch_stats $dev2 0))"
+
+	for i in {0..7}; do
+		local dscpi=$((i << 2))
+		local expect2=0
+		local expect3=0
+
+		if ((i == prio2)); then
+			expect2=10
+		fi
+		if ((i == prio3)); then
+			expect3=10
+		fi
+
+		local delta=$((dev2_t1s[$i] - dev2_t0s[$i]))
+		((expect2 == delta))
+		check_err $? "DSCP $dscpi@$dev2: Expected to capture $expect2 packets, got $delta."
+
+		delta=$((dev1_t1s[$i] - dev1_t0s[$i]))
+		((expect3 == delta))
+		check_err $? "DSCP $dscpi@$dev1: Expected to capture $expect3 packets, got $delta."
+	done
+
+	log_test "DSCP rewrite: $dscp-(prio $prio2)-$dscp2-(prio $prio3)-$dscp3"
+}
+
+__test_update()
+{
+	local update=$1; shift
+	local reprio=$1; shift
+
+	sysctl_restore net.ipv4.ip_forward_update_priority
+	sysctl_set net.ipv4.ip_forward_update_priority $update
+
+	for prio in {0..7}; do
+		dscp_ping_test v$h1 192.0.2.1 192.0.2.18 $prio $reprio $h1 $h2
+	done
+}
+
+test_update()
+{
+	__test_update 1 reprioritize
+}
+
+test_no_update()
+{
+	__test_update 0 echo
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
new file mode 100644
index 000000000..d231649b4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ROUTER_NUM_NETIFS=4
+
+router_h1_create()
+{
+	simple_if_init $h1 192.0.1.1/24
+	ip route add 193.0.0.0/8 via 192.0.1.2 dev $h1
+}
+
+router_h1_destroy()
+{
+	ip route del 193.0.0.0/8 via 192.0.1.2 dev $h1
+	simple_if_fini $h1 192.0.1.1/24
+}
+
+router_h2_create()
+{
+	simple_if_init $h2 192.0.2.1/24
+	tc qdisc add dev $h2 handle ffff: ingress
+}
+
+router_h2_destroy()
+{
+	tc qdisc del dev $h2 handle ffff: ingress
+	simple_if_fini $h2 192.0.2.1/24
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	ip address add 192.0.1.2/24 dev $rp1
+	ip address add 192.0.2.2/24 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 192.0.2.2/24 dev $rp2
+	ip address del 192.0.1.2/24 dev $rp1
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+router_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	rp1mac=$(mac_get $rp1)
+
+	vrf_prepare
+
+	router_h1_create
+	router_h2_create
+
+	router_create
+}
+
+router_offload_validate()
+{
+	local route_count=$1
+	local offloaded_count
+
+	offloaded_count=$(ip route | grep -o 'offload' | wc -l)
+	[[ $offloaded_count -ge $route_count ]]
+}
+
+router_routes_create()
+{
+	local route_count=$1
+	local count=0
+
+	ROUTE_FILE="$(mktemp)"
+
+	for i in {0..255}
+	do
+		for j in {0..255}
+		do
+			for k in {0..255}
+			do
+				if [[ $count -eq $route_count ]]; then
+					break 3
+				fi
+
+				echo route add 193.${i}.${j}.${k}/32 via \
+				       192.0.2.1 dev $rp2  >> $ROUTE_FILE
+				((count++))
+			done
+		done
+	done
+
+	ip -b $ROUTE_FILE &> /dev/null
+}
+
+router_routes_destroy()
+{
+	if [[ -v ROUTE_FILE ]]; then
+		rm -f $ROUTE_FILE
+	fi
+}
+
+router_test()
+{
+	local route_count=$1
+	local should_fail=$2
+	local count=0
+
+	RET=0
+
+	router_routes_create $route_count
+
+	router_offload_validate $route_count
+	check_err_fail $should_fail $? "Offload of $route_count routes"
+	if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then
+		return
+	fi
+
+	tc filter add dev $h2 ingress protocol ip pref 1 flower \
+		skip_sw dst_ip 193.0.0.0/8 action drop
+
+	for i in {0..255}
+	do
+		for j in {0..255}
+		do
+			for k in {0..255}
+			do
+				if [[ $count -eq $route_count ]]; then
+					break 3
+				fi
+
+				$MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac \
+					-A 192.0.1.1 -B 193.${i}.${j}.${k} \
+					-t ip -q
+				((count++))
+			done
+		done
+	done
+
+	tc_check_packets "dev $h2 ingress" 1 $route_count
+	check_err $? "Offload mismatch"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 flower \
+		skip_sw dst_ip 193.0.0.0/8 action drop
+
+	router_routes_destroy
+}
+
+router_cleanup()
+{
+	pre_cleanup
+
+	router_routes_destroy
+	router_destroy
+
+	router_h2_destroy
+	router_h1_destroy
+
+	vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
new file mode 100755
index 000000000..3b75180f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the A-TCAM and C-TCAM operation in Spectrum-2.
+# It tries to exercise as many code paths in the eRP state machine as
+# possible.
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
+	multiple_masks_test ctcam_edge_cases_test"
+NUM_NETIFS=2
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+single_mask_test()
+{
+	# When only a single mask is required, the device uses the master
+	# mask and not the eRP table. Verify that under this mode the right
+	# filter is matched
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Single filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 198.51.100.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 2
+	check_err $? "Two filters - did not match highest priority"
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Two filters - did not match lowest priority"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Single filter - did not match after delete"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "single mask test ($tcflags)"
+}
+
+identical_filters_test()
+{
+	# When two filters that only differ in their priority are used,
+	# one needs to be inserted into the C-TCAM. This test verifies
+	# that filters are correctly spilled to C-TCAM and that the right
+	# filter is matched
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match C-TCAM filter after A-TCAM delete"
+
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 2
+	check_err $? "Did not match C-TCAM filter after A-TCAM add"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match A-TCAM filter after C-TCAM delete"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "identical filters test ($tcflags)"
+}
+
+two_masks_test()
+{
+	# When more than one mask is required, the eRP table is used. This
+	# test verifies that the eRP table is correctly allocated and used
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.0.0/16 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Two filters - did not match highest priority"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Single filter - did not match"
+
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Two filters - did not match highest priority after add"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "two masks test ($tcflags)"
+}
+
+multiple_masks_test()
+{
+	# The number of masks in a region is limited. Once the maximum
+	# number of masks has been reached filters that require new
+	# masks are spilled to the C-TCAM. This test verifies that
+	# spillage is performed correctly and that the right filter is
+	# matched
+
+	local index
+
+	RET=0
+
+	NUM_MASKS=32
+	BASE_INDEX=100
+
+	for i in $(eval echo {1..$NUM_MASKS}); do
+		index=$((BASE_INDEX - i))
+
+		tc filter add dev $h2 ingress protocol ip pref $index \
+			handle $index \
+			flower $tcflags dst_ip 192.0.2.2/${i} src_ip 192.0.2.1 \
+			action drop
+
+		$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+			-B 192.0.2.2 -t ip -q
+
+		tc_check_packets "dev $h2 ingress" $index 1
+		check_err $? "$i filters - did not match highest priority (add)"
+	done
+
+	for i in $(eval echo {$NUM_MASKS..1}); do
+		index=$((BASE_INDEX - i))
+
+		$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+			-B 192.0.2.2 -t ip -q
+
+		tc_check_packets "dev $h2 ingress" $index 2
+		check_err $? "$i filters - did not match highest priority (del)"
+
+		tc filter del dev $h2 ingress protocol ip pref $index \
+			handle $index flower
+	done
+
+	log_test "multiple masks test ($tcflags)"
+}
+
+ctcam_two_atcam_masks_test()
+{
+	RET=0
+
+	# First case: C-TCAM is disabled when there are two A-TCAM masks.
+	# We push a filter into the C-TCAM by using two identical filters
+	# as in identical_filters_test()
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match A-TCAM filter"
+
+	# Delete both A-TCAM and C-TCAM filters and make sure the remaining
+	# A-TCAM filter still works
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "ctcam with two atcam masks test ($tcflags)"
+}
+
+ctcam_one_atcam_mask_test()
+{
+	RET=0
+
+	# Second case: C-TCAM is disabled when there is one A-TCAM mask.
+	# The test is similar to identical_filters_test()
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match C-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match A-TCAM filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "ctcam with one atcam mask test ($tcflags)"
+}
+
+ctcam_no_atcam_masks_test()
+{
+	RET=0
+
+	# Third case: C-TCAM is disabled when there are no A-TCAM masks
+	# This test exercises the code path that transitions the eRP table
+	# to its initial state after deleting the last C-TCAM mask
+
+	# Filter goes into A-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	# Filter goes into C-TCAM
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "ctcam with no atcam masks test ($tcflags)"
+}
+
+ctcam_edge_cases_test()
+{
+	# When the C-TCAM is disabled after deleting the last C-TCAM
+	# mask, we want to make sure the eRP state machine is put in
+	# the correct state
+
+	ctcam_two_atcam_masks_test
+	ctcam_one_atcam_mask_test
+	ctcam_no_atcam_masks_test
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+if ! tc_offload_check; then
+	check_err 1 "Could not test offloaded functionality"
+	log_test "mlxsw-specific tests for tc flower"
+	exit
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
new file mode 100644
index 000000000..73035e250
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "../../../../net/forwarding/devlink_lib.sh"
+
+if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then
+	echo "SKIP: test is tailored for Mellanox Spectrum"
+	exit 1
+fi
+
+# Needed for returning to default
+declare -A KVD_DEFAULTS
+
+KVD_CHILDREN="linear hash_single hash_double"
+KVDL_CHILDREN="singles chunks large_chunks"
+
+devlink_sp_resource_minimize()
+{
+	local size
+	local i
+
+	for i in $KVD_CHILDREN; do
+		size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd "$i"
+	done
+
+	for i in $KVDL_CHILDREN; do
+		size=$(devlink_resource_get kvd linear "$i" | \
+		       jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd linear "$i"
+	done
+}
+
+devlink_sp_size_kvd_to_default()
+{
+	local need_reload=0
+	local i
+
+	for i in $KVD_CHILDREN; do
+		local size=$(echo "${KVD_DEFAULTS[kvd_$i]}" | jq '.["size"]')
+		current_size=$(devlink_resource_size_get kvd "$i")
+
+		if [ "$size" -ne "$current_size" ]; then
+			devlink_resource_size_set "$size" kvd "$i"
+			need_reload=1
+		fi
+	done
+
+	for i in $KVDL_CHILDREN; do
+		local size=$(echo "${KVD_DEFAULTS[kvd_linear_$i]}" | \
+			     jq '.["size"]')
+		current_size=$(devlink_resource_size_get kvd linear "$i")
+
+		if [ "$size" -ne "$current_size" ]; then
+			devlink_resource_size_set "$size" kvd linear "$i"
+			need_reload=1
+		fi
+	done
+
+	if [ "$need_reload" -ne "0" ]; then
+		devlink_reload
+	fi
+}
+
+devlink_sp_read_kvd_defaults()
+{
+	local key
+	local i
+
+	KVD_DEFAULTS[kvd]=$(devlink_resource_get "kvd")
+	for i in $KVD_CHILDREN; do
+		key=kvd_$i
+		KVD_DEFAULTS[$key]=$(devlink_resource_get kvd "$i")
+	done
+
+	for i in $KVDL_CHILDREN; do
+		key=kvd_linear_$i
+		KVD_DEFAULTS[$key]=$(devlink_resource_get kvd linear "$i")
+	done
+}
+
+KVD_PROFILES="default scale ipv4_max"
+
+devlink_sp_resource_kvd_profile_set()
+{
+	local profile=$1
+
+	case "$profile" in
+	scale)
+		devlink_resource_size_set 64000 kvd linear
+		devlink_resource_size_set 15616 kvd linear singles
+		devlink_resource_size_set 32000 kvd linear chunks
+		devlink_resource_size_set 16384 kvd linear large_chunks
+		devlink_resource_size_set 128000 kvd hash_single
+		devlink_resource_size_set 48000 kvd hash_double
+		devlink_reload
+		;;
+	ipv4_max)
+		devlink_resource_size_set 64000 kvd linear
+		devlink_resource_size_set 15616 kvd linear singles
+		devlink_resource_size_set 32000 kvd linear chunks
+		devlink_resource_size_set 16384 kvd linear large_chunks
+		devlink_resource_size_set 144000 kvd hash_single
+		devlink_resource_size_set 32768 kvd hash_double
+		devlink_reload
+		;;
+	default)
+		devlink_resource_size_set 98304 kvd linear
+		devlink_resource_size_set 16384 kvd linear singles
+		devlink_resource_size_set 49152 kvd linear chunks
+		devlink_resource_size_set 32768 kvd linear large_chunks
+		devlink_resource_size_set 87040 kvd hash_single
+		devlink_resource_size_set 60416 kvd hash_double
+		devlink_reload
+		;;
+	*)
+		check_err 1 "Unknown profile $profile"
+	esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
new file mode 100755
index 000000000..b1fe960e3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=1
+source devlink_lib_spectrum.sh
+
+setup_prepare()
+{
+	devlink_sp_read_kvd_defaults
+}
+
+cleanup()
+{
+	pre_cleanup
+	devlink_sp_size_kvd_to_default
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+profiles_test()
+{
+	local i
+
+	log_info "Running profile tests"
+
+	for i in $KVD_PROFILES; do
+		RET=0
+		devlink_sp_resource_kvd_profile_set $i
+		log_test "'$i' profile"
+	done
+
+	# Default is explicitly tested at end to ensure it's actually applied
+	RET=0
+	devlink_sp_resource_kvd_profile_set "default"
+	log_test "'default' profile"
+}
+
+resources_min_test()
+{
+	local size
+	local i
+	local j
+
+	log_info "Running KVD-minimum tests"
+
+	for i in $KVD_CHILDREN; do
+		RET=0
+		size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+		devlink_resource_size_set "$size" kvd "$i"
+
+		# In case of linear, need to minimize sub-resources as well
+		if [[ "$i" == "linear" ]]; then
+			for j in $KVDL_CHILDREN; do
+				devlink_resource_size_set 0 kvd linear "$j"
+			done
+		fi
+
+		devlink_reload
+		devlink_sp_size_kvd_to_default
+		log_test "'$i' minimize [$size]"
+	done
+}
+
+resources_max_test()
+{
+	local min_size
+	local size
+	local i
+	local j
+
+	log_info "Running KVD-maximum tests"
+	for i in $KVD_CHILDREN; do
+		RET=0
+		devlink_sp_resource_minimize
+
+		# Calculate the maximum possible size for the given partition
+		size=$(devlink_resource_size_get kvd)
+		for j in $KVD_CHILDREN; do
+			if [ "$i" != "$j" ]; then
+				min_size=$(devlink_resource_get kvd "$j" | \
+					   jq '.["size_min"]')
+				size=$((size - min_size))
+			fi
+		done
+
+		# Test almost maximum size
+		devlink_resource_size_set "$((size - 128))" kvd "$i"
+		devlink_reload
+		log_test "'$i' almost maximize [$((size - 128))]"
+
+		# Test above maximum size
+		devlink resource set "$DEVLINK_DEV" \
+			path "kvd/$i" size $((size + 128)) &> /dev/null
+		check_fail $? "Set kvd/$i to size $((size + 128)) should fail"
+		log_test "'$i' Overflow rejection [$((size + 128))]"
+
+		# Test maximum size
+		if [ "$i" == "hash_single" ] || [ "$i" == "hash_double" ]; then
+			echo "SKIP: Observed problem with exact max $i"
+			continue
+		fi
+
+		devlink_resource_size_set "$size" kvd "$i"
+		devlink_reload
+		log_test "'$i' maximize [$size]"
+
+		devlink_sp_size_kvd_to_default
+	done
+}
+
+profiles_test
+resources_min_test
+resources_max_test
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
new file mode 100644
index 000000000..8d2186c7c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+	local should_fail=$1; shift
+
+	if ((! should_fail)); then
+		echo 3
+	else
+		echo 4
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
new file mode 100755
index 000000000..a0a80e1a6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=6
+source ../../../../net/forwarding/lib.sh
+source ../../../../net/forwarding/tc_common.sh
+source devlink_lib_spectrum.sh
+
+current_test=""
+
+cleanup()
+{
+	pre_cleanup
+	if [ ! -z $current_test ]; then
+		${current_test}_cleanup
+	fi
+	devlink_sp_size_kvd_to_default
+}
+
+devlink_sp_read_kvd_defaults
+trap cleanup EXIT
+
+ALL_TESTS="router tc_flower mirror_gre"
+for current_test in ${TESTS:-$ALL_TESTS}; do
+	source ${current_test}_scale.sh
+
+	num_netifs_var=${current_test^^}_NUM_NETIFS
+	num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+	for profile in $KVD_PROFILES; do
+		RET=0
+		devlink_sp_resource_kvd_profile_set $profile
+		if [[ $RET -gt 0 ]]; then
+			log_test "'$current_test' [$profile] setting"
+			continue
+		fi
+
+		for should_fail in 0 1; do
+			RET=0
+			target=$(${current_test}_get_target "$should_fail")
+			${current_test}_setup_prepare
+			setup_wait $num_netifs
+			${current_test}_test "$target" "$should_fail"
+			${current_test}_cleanup
+			if [[ "$should_fail" -eq 0 ]]; then
+				log_test "'$current_test' [$profile] $target"
+			else
+				log_test "'$current_test' [$profile] overflow $target"
+			fi
+		done
+	done
+done
+current_test=""
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
new file mode 100644
index 000000000..21c4697d5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../router_scale.sh
+
+router_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get kvd hash_single)
+
+	if [[ $should_fail -eq 0 ]]; then
+		target=$((target * 85 / 100))
+	else
+		target=$((target + 1))
+	fi
+
+	echo $target
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
new file mode 100644
index 000000000..f9bfd8937
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+	local should_fail=$1; shift
+
+	# 6144 (6x1024) is the theoretical maximum.
+	# One bank of 512 rules is taken by the 18-byte MC router rule.
+	# One rule is the ACL catch-all.
+	# 6144 - 512 - 1 = 5631
+	local target=5631
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
new file mode 100644
index 000000000..a6d733d2a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for resource limit of offloaded flower rules. The test adds a given
+# number of flower matches for different IPv6 addresses, then generates traffic,
+# and ensures each was hit exactly once. This file contains functions to set up
+# a testing topology and run the test, and is meant to be sourced from a test
+# script that calls the testing routine with a given number of rules.
+
+TC_FLOWER_NUM_NETIFS=2
+
+tc_flower_h1_create()
+{
+	simple_if_init $h1
+	tc qdisc add dev $h1 clsact
+}
+
+tc_flower_h1_destroy()
+{
+	tc qdisc del dev $h1 clsact
+	simple_if_fini $h1
+}
+
+tc_flower_h2_create()
+{
+	simple_if_init $h2
+	tc qdisc add dev $h2 clsact
+}
+
+tc_flower_h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2
+}
+
+tc_flower_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	tc_flower_h1_create
+	tc_flower_h2_create
+}
+
+tc_flower_cleanup()
+{
+	pre_cleanup
+
+	tc_flower_h2_destroy
+	tc_flower_h1_destroy
+
+	vrf_cleanup
+
+	if [[ -v TC_FLOWER_BATCH_FILE ]]; then
+		rm -f $TC_FLOWER_BATCH_FILE
+	fi
+}
+
+tc_flower_addr()
+{
+	local num=$1; shift
+
+	printf "2001:db8:1::%x" $num
+}
+
+tc_flower_rules_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	TC_FLOWER_BATCH_FILE="$(mktemp)"
+
+	for ((i = 0; i < count; ++i)); do
+		cat >> $TC_FLOWER_BATCH_FILE <<-EOF
+			filter add dev $h2 ingress \
+				prot ipv6 \
+				pref 1000 \
+				flower $tcflags dst_ip $(tc_flower_addr $i) \
+				action drop
+		EOF
+	done
+
+	tc -b $TC_FLOWER_BATCH_FILE
+	check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_flower_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+	local last=$((count - 1))
+
+	tc_flower_rules_create $count $should_fail
+
+	for ((i = 0; i < count; ++i)); do
+		$MZ $h1 -q -c 1 -t ip -p 20 -b bc -6 \
+			-A 2001:db8:2::1 \
+			-B $(tc_flower_addr $i)
+	done
+
+	MISMATCHES=$(
+		tc -j -s filter show dev $h2 ingress |
+		jq -r '[ .[] | select(.kind == "flower") | .options |
+		         values as $rule | .actions[].stats.packets |
+		         select(. != 1) | "\(.) on \($rule.keys.dst_ip)" ] |
+		       join(", ")'
+	)
+
+	test -z "$MISMATCHES"
+	check_err $? "Expected to capture 1 packet for each IP, but got $MISMATCHES"
+}
+
+tc_flower_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	# We use lower 16 bits of IPv6 address for match. Also there are only 16
+	# bits of rule priority space.
+	if ((count > 65536)); then
+		check_err 1 "Invalid count of $count. At most 65536 rules supported"
+		return
+	fi
+
+	if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+		check_err 1 "Could not test offloaded functionality"
+		return
+	fi
+
+	tcflags="skip_sw"
+	__tc_flower_test $count $should_fail
+}
diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
new file mode 100755
index 000000000..128f0ab24
--- /dev/null
+++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
@@ -0,0 +1,200 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+usage() { echo "usbip_test.sh -b <busid> -p <usbip tools path>"; exit 1; }
+
+while getopts "h:b:p:" arg; do
+    case "${arg}" in
+	h)
+	    usage
+	    ;;
+	b)
+	    busid=${OPTARG}
+	    ;;
+	p)
+	    tools_path=${OPTARG}
+	    ;;
+	*)
+	    usage
+	    ;;
+    esac
+done
+shift $((OPTIND-1))
+
+if [ -z "${busid}" ]; then
+	usage
+fi
+
+echo "Running USB over IP Testing on $busid";
+
+test_end_msg="End of USB over IP Testing on $busid"
+
+if [ $UID != 0 ]; then
+	echo "Please run usbip_test as root [SKIP]"
+	echo $test_end_msg
+	exit $ksft_skip
+fi
+
+echo "Load usbip_host module"
+if ! /sbin/modprobe -q -n usbip_host; then
+	echo "usbip_test: module usbip_host is not found [SKIP]"
+	echo $test_end_msg
+	exit $ksft_skip
+fi
+
+if /sbin/modprobe -q usbip_host; then
+	echo "usbip_test: module usbip_host is loaded [OK]"
+else
+	echo "usbip_test: module usbip_host failed to load [FAIL]"
+	echo $test_end_msg
+	exit 1
+fi
+
+echo "Load vhci_hcd module"
+if /sbin/modprobe -q vhci_hcd; then
+	echo "usbip_test: module vhci_hcd is loaded [OK]"
+else
+	echo "usbip_test: module vhci_hcd failed to load [FAIL]"
+	echo $test_end_msg
+	exit 1
+fi
+echo "=============================================================="
+
+cd $tools_path;
+
+if [ ! -f src/usbip ]; then
+	echo "Please build usbip tools"
+	echo $test_end_msg
+	exit $ksft_skip
+fi
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Run lsusb to see all usb devices"
+lsusb -t;
+echo "=============================================================="
+
+src/usbipd -D;
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be under usbip_host control"
+lsusb -t;
+echo "=============================================================="
+
+echo "bind devices - expect already bound messages"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "unbind devices";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "unbind devices - expect no devices bound message";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should fail with no devices"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "List imported devices - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should work"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+# Wait for sysfs file to be updated. Without this sleep, usbip port
+# shows no imported devices.
+sleep 3;
+
+echo "List imported devices - expect to see imported devices";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - expect already imported messages"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "Un-import devices";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Un-import devices - expect no devices to detach messages";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "Detach invalid port tests - expect invalid port error message";
+src/usbip detach -p 100;
+echo "=============================================================="
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Remove usbip_host module";
+rmmod usbip_host;
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "Run bind without usbip_host - expect fail"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - devices that failed to bind aren't bound to any driver"
+lsusb -t;
+echo "=============================================================="
+
+echo "modprobe usbip_host - does it work?"
+/sbin/modprobe usbip_host
+echo "Should see -busid- is not in match_busid table... skip! dmesg"
+echo "=============================================================="
+dmesg | grep "is not in match_busid table"
+echo "=============================================================="
+
+echo $test_end_msg
diff --git a/tools/testing/selftests/efivarfs/.gitignore b/tools/testing/selftests/efivarfs/.gitignore
new file mode 100644
index 000000000..336184935
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/.gitignore
@@ -0,0 +1,2 @@
+create-read
+open-unlink
diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile
new file mode 100644
index 000000000..c49dcea69
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/Makefile
@@ -0,0 +1,7 @@
+CFLAGS = -Wall
+
+TEST_GEN_FILES := open-unlink create-read
+TEST_PROGS := efivarfs.sh
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/efivarfs/config b/tools/testing/selftests/efivarfs/config
new file mode 100644
index 000000000..4e151f100
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/config
@@ -0,0 +1 @@
+CONFIG_EFIVAR_FS=y
diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c
new file mode 100644
index 000000000..9674a1939
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/create-read.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+	const char *path;
+	char buf[4];
+	int fd, rc;
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s <path>\n", argv[0]);
+		return EXIT_FAILURE;
+	}
+
+	path = argv[1];
+
+	/* create a test variable */
+	fd = open(path, O_RDWR | O_CREAT, 0600);
+	if (fd < 0) {
+		perror("open(O_WRONLY)");
+		return EXIT_FAILURE;
+	}
+
+	rc = read(fd, buf, sizeof(buf));
+	if (rc != 0) {
+		fprintf(stderr, "Reading a new var should return EOF\n");
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
new file mode 100755
index 000000000..a47029a79
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -0,0 +1,215 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+efivarfs_mount=/sys/firmware/efi/efivars
+test_guid=210be57c-9849-4fc7-a635-e6382d1aec27
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_prereqs()
+{
+	local msg="skip all tests:"
+
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit $ksft_skip
+	fi
+
+	if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then
+		echo $msg efivarfs is not mounted on $efivarfs_mount >&2
+		exit $ksft_skip
+	fi
+}
+
+run_test()
+{
+	local test="$1"
+
+	echo "--------------------"
+	echo "running $test"
+	echo "--------------------"
+
+	if [ "$(type -t $test)" = 'function' ]; then
+		( $test )
+	else
+		( ./$test )
+	fi
+
+	if [ $? -ne 0 ]; then
+		echo "  [FAIL]"
+		rc=1
+	else
+		echo "  [PASS]"
+	fi
+}
+
+test_create()
+{
+	local attrs='\x07\x00\x00\x00'
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+	printf "$attrs\x00" > $file
+
+	if [ ! -e $file ]; then
+		echo "$file couldn't be created" >&2
+		exit 1
+	fi
+
+	if [ $(stat -c %s $file) -ne 5 ]; then
+		echo "$file has invalid size" >&2
+		exit 1
+	fi
+}
+
+test_create_empty()
+{
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+	: > $file
+
+	if [ ! -e $file ]; then
+		echo "$file can not be created without writing" >&2
+		exit 1
+	fi
+}
+
+test_create_read()
+{
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+	./create-read $file
+}
+
+test_delete()
+{
+	local attrs='\x07\x00\x00\x00'
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+	printf "$attrs\x00" > $file
+
+	if [ ! -e $file ]; then
+		echo "$file couldn't be created" >&2
+		exit 1
+	fi
+
+	rm $file 2>/dev/null
+	if [ $? -ne 0 ]; then
+		chattr -i $file
+		rm $file
+	fi
+
+	if [ -e $file ]; then
+		echo "$file couldn't be deleted" >&2
+		exit 1
+	fi
+
+}
+
+# test that we can remove a variable by issuing a write with only
+# attributes specified
+test_zero_size_delete()
+{
+	local attrs='\x07\x00\x00\x00'
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+	printf "$attrs\x00" > $file
+
+	if [ ! -e $file ]; then
+		echo "$file does not exist" >&2
+		exit 1
+	fi
+
+	chattr -i $file
+	printf "$attrs" > $file
+
+	if [ -e $file ]; then
+		echo "$file should have been deleted" >&2
+		exit 1
+	fi
+}
+
+test_open_unlink()
+{
+	local file=$efivarfs_mount/$FUNCNAME-$test_guid
+	./open-unlink $file
+}
+
+# test that we can create a range of filenames
+test_valid_filenames()
+{
+	local attrs='\x07\x00\x00\x00'
+	local ret=0
+
+	local file_list="abc dump-type0-11-1-1362436005 1234 -"
+	for f in $file_list; do
+		local file=$efivarfs_mount/$f-$test_guid
+
+		printf "$attrs\x00" > $file
+
+		if [ ! -e $file ]; then
+			echo "$file could not be created" >&2
+			ret=1
+		else
+			rm $file 2>/dev/null
+			if [ $? -ne 0 ]; then
+				chattr -i $file
+				rm $file
+			fi
+		fi
+	done
+
+	exit $ret
+}
+
+test_invalid_filenames()
+{
+	local attrs='\x07\x00\x00\x00'
+	local ret=0
+
+	local file_list="
+		-1234-1234-1234-123456789abc
+		foo
+		foo-bar
+		-foo-
+		foo-barbazba-foob-foob-foob-foobarbazfoo
+		foo-------------------------------------
+		-12345678-1234-1234-1234-123456789abc
+		a-12345678=1234-1234-1234-123456789abc
+		a-12345678-1234=1234-1234-123456789abc
+		a-12345678-1234-1234=1234-123456789abc
+		a-12345678-1234-1234-1234=123456789abc
+		1112345678-1234-1234-1234-123456789abc"
+
+	for f in $file_list; do
+		local file=$efivarfs_mount/$f
+
+		printf "$attrs\x00" 2>/dev/null > $file
+
+		if [ -e $file ]; then
+			echo "Creating $file should have failed" >&2
+			rm $file 2>/dev/null
+			if [ $? -ne 0 ]; then
+				chattr -i $file
+				rm $file
+			fi
+			ret=1
+		fi
+	done
+
+	exit $ret
+}
+
+check_prereqs
+
+rc=0
+
+run_test test_create
+run_test test_create_empty
+run_test test_create_read
+run_test test_delete
+run_test test_zero_size_delete
+run_test test_open_unlink
+run_test test_valid_filenames
+run_test test_invalid_filenames
+
+exit $rc
diff --git a/tools/testing/selftests/efivarfs/open-unlink.c b/tools/testing/selftests/efivarfs/open-unlink.c
new file mode 100644
index 000000000..562742d44
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/open-unlink.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+
+static int set_immutable(const char *path, int immutable)
+{
+	unsigned int flags;
+	int fd;
+	int rc;
+	int error;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	rc = ioctl(fd, FS_IOC_GETFLAGS, &flags);
+	if (rc < 0) {
+		error = errno;
+		close(fd);
+		errno = error;
+		return rc;
+	}
+
+	if (immutable)
+		flags |= FS_IMMUTABLE_FL;
+	else
+		flags &= ~FS_IMMUTABLE_FL;
+
+	rc = ioctl(fd, FS_IOC_SETFLAGS, &flags);
+	error = errno;
+	close(fd);
+	errno = error;
+	return rc;
+}
+
+static int get_immutable(const char *path)
+{
+	unsigned int flags;
+	int fd;
+	int rc;
+	int error;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	rc = ioctl(fd, FS_IOC_GETFLAGS, &flags);
+	if (rc < 0) {
+		error = errno;
+		close(fd);
+		errno = error;
+		return rc;
+	}
+	close(fd);
+	if (flags & FS_IMMUTABLE_FL)
+		return 1;
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	const char *path;
+	char buf[5];
+	int fd, rc;
+
+	if (argc < 2) {
+		fprintf(stderr, "usage: %s <path>\n", argv[0]);
+		return EXIT_FAILURE;
+	}
+
+	path = argv[1];
+
+	/* attributes: EFI_VARIABLE_NON_VOLATILE |
+	 *		EFI_VARIABLE_BOOTSERVICE_ACCESS |
+	 *		EFI_VARIABLE_RUNTIME_ACCESS
+	 */
+	*(uint32_t *)buf = 0x7;
+	buf[4] = 0;
+
+	/* create a test variable */
+	fd = open(path, O_WRONLY | O_CREAT, 0600);
+	if (fd < 0) {
+		perror("open(O_WRONLY)");
+		return EXIT_FAILURE;
+	}
+
+	rc = write(fd, buf, sizeof(buf));
+	if (rc != sizeof(buf)) {
+		perror("write");
+		return EXIT_FAILURE;
+	}
+
+	close(fd);
+
+	rc = get_immutable(path);
+	if (rc < 0) {
+		perror("ioctl(FS_IOC_GETFLAGS)");
+		return EXIT_FAILURE;
+	} else if (rc) {
+		rc = set_immutable(path, 0);
+		if (rc < 0) {
+			perror("ioctl(FS_IOC_SETFLAGS)");
+			return EXIT_FAILURE;
+		}
+	}
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		perror("open");
+		return EXIT_FAILURE;
+	}
+
+	if (unlink(path) < 0) {
+		perror("unlink");
+		return EXIT_FAILURE;
+	}
+
+	rc = read(fd, buf, sizeof(buf));
+	if (rc > 0) {
+		fprintf(stderr, "reading from an unlinked variable "
+				"shouldn't be possible\n");
+		return EXIT_FAILURE;
+	}
+
+	return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
new file mode 100644
index 000000000..64073e050
--- /dev/null
+++ b/tools/testing/selftests/exec/.gitignore
@@ -0,0 +1,9 @@
+subdir*
+script*
+execveat
+execveat.symlink
+execveat.moved
+execveat.path.ephemeral
+execveat.ephemeral
+execveat.denatured
+xxxxxxxx*
+\ No newline at end of file
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
new file mode 100644
index 000000000..427c41ba5
--- /dev/null
+++ b/tools/testing/selftests/exec/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS = -Wall
+
+TEST_GEN_PROGS := execveat
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
+# Makefile is a run-time dependency, since it's accessed by the execveat test
+TEST_FILES := Makefile
+
+EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*
+
+include ../lib.mk
+
+$(OUTPUT)/subdir:
+	mkdir -p $@
+$(OUTPUT)/script:
+	echo '#!/bin/sh' > $@
+	echo 'exit $$*' >> $@
+	chmod +x $@
+$(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
+	cd $(OUTPUT) && ln -s -f $(shell basename $<) $(shell basename $@)
+$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
+	cp $< $@
+	chmod -x $@
+
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
new file mode 100644
index 000000000..47cbf54d0
--- /dev/null
+++ b/tools/testing/selftests/exec/execveat.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2014 Google, Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for execveat(2).
+ */
+
+#define _GNU_SOURCE  /* to get O_PATH, AT_EMPTY_PATH */
+#include <sys/sendfile.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+static char longpath[2 * PATH_MAX] = "";
+static char *envp[] = { "IN_TEST=yes", NULL, NULL };
+static char *argv[] = { "execveat", "99", NULL };
+
+static int execveat_(int fd, const char *path, char **argv, char **envp,
+		     int flags)
+{
+#ifdef __NR_execveat
+	return syscall(__NR_execveat, fd, path, argv, envp, flags);
+#else
+	errno = ENOSYS;
+	return -1;
+#endif
+}
+
+#define check_execveat_fail(fd, path, flags, errno)	\
+	_check_execveat_fail(fd, path, flags, errno, #errno)
+static int _check_execveat_fail(int fd, const char *path, int flags,
+				int expected_errno, const char *errno_str)
+{
+	int rc;
+
+	errno = 0;
+	printf("Check failure of execveat(%d, '%s', %d) with %s... ",
+		fd, path?:"(null)", flags, errno_str);
+	rc = execveat_(fd, path, argv, envp, flags);
+
+	if (rc > 0) {
+		printf("[FAIL] (unexpected success from execveat(2))\n");
+		return 1;
+	}
+	if (errno != expected_errno) {
+		printf("[FAIL] (expected errno %d (%s) not %d (%s)\n",
+			expected_errno, strerror(expected_errno),
+			errno, strerror(errno));
+		return 1;
+	}
+	printf("[OK]\n");
+	return 0;
+}
+
+static int check_execveat_invoked_rc(int fd, const char *path, int flags,
+				     int expected_rc, int expected_rc2)
+{
+	int status;
+	int rc;
+	pid_t child;
+	int pathlen = path ? strlen(path) : 0;
+
+	if (pathlen > 40)
+		printf("Check success of execveat(%d, '%.20s...%s', %d)... ",
+			fd, path, (path + pathlen - 20), flags);
+	else
+		printf("Check success of execveat(%d, '%s', %d)... ",
+			fd, path?:"(null)", flags);
+	child = fork();
+	if (child < 0) {
+		printf("[FAIL] (fork() failed)\n");
+		return 1;
+	}
+	if (child == 0) {
+		/* Child: do execveat(). */
+		rc = execveat_(fd, path, argv, envp, flags);
+		printf("[FAIL]: execveat() failed, rc=%d errno=%d (%s)\n",
+			rc, errno, strerror(errno));
+		exit(1);  /* should not reach here */
+	}
+	/* Parent: wait for & check child's exit status. */
+	rc = waitpid(child, &status, 0);
+	if (rc != child) {
+		printf("[FAIL] (waitpid(%d,...) returned %d)\n", child, rc);
+		return 1;
+	}
+	if (!WIFEXITED(status)) {
+		printf("[FAIL] (child %d did not exit cleanly, status=%08x)\n",
+			child, status);
+		return 1;
+	}
+	if ((WEXITSTATUS(status) != expected_rc) &&
+	    (WEXITSTATUS(status) != expected_rc2)) {
+		printf("[FAIL] (child %d exited with %d not %d nor %d)\n",
+			child, WEXITSTATUS(status), expected_rc, expected_rc2);
+		return 1;
+	}
+	printf("[OK]\n");
+	return 0;
+}
+
+static int check_execveat(int fd, const char *path, int flags)
+{
+	return check_execveat_invoked_rc(fd, path, flags, 99, 99);
+}
+
+static char *concat(const char *left, const char *right)
+{
+	char *result = malloc(strlen(left) + strlen(right) + 1);
+
+	strcpy(result, left);
+	strcat(result, right);
+	return result;
+}
+
+static int open_or_die(const char *filename, int flags)
+{
+	int fd = open(filename, flags);
+
+	if (fd < 0) {
+		printf("Failed to open '%s'; "
+			"check prerequisites are available\n", filename);
+		exit(1);
+	}
+	return fd;
+}
+
+static void exe_cp(const char *src, const char *dest)
+{
+	int in_fd = open_or_die(src, O_RDONLY);
+	int out_fd = open(dest, O_RDWR|O_CREAT|O_TRUNC, 0755);
+	struct stat info;
+
+	fstat(in_fd, &info);
+	sendfile(out_fd, in_fd, NULL, info.st_size);
+	close(in_fd);
+	close(out_fd);
+}
+
+#define XX_DIR_LEN 200
+static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
+{
+	int fail = 0;
+	int ii, count, len;
+	char longname[XX_DIR_LEN + 1];
+	int fd;
+
+	if (*longpath == '\0') {
+		/* Create a filename close to PATH_MAX in length */
+		char *cwd = getcwd(NULL, 0);
+
+		if (!cwd) {
+			printf("Failed to getcwd(), errno=%d (%s)\n",
+			       errno, strerror(errno));
+			return 2;
+		}
+		strcpy(longpath, cwd);
+		strcat(longpath, "/");
+		memset(longname, 'x', XX_DIR_LEN - 1);
+		longname[XX_DIR_LEN - 1] = '/';
+		longname[XX_DIR_LEN] = '\0';
+		count = (PATH_MAX - 3 - strlen(cwd)) / XX_DIR_LEN;
+		for (ii = 0; ii < count; ii++) {
+			strcat(longpath, longname);
+			mkdir(longpath, 0755);
+		}
+		len = (PATH_MAX - 3 - strlen(cwd)) - (count * XX_DIR_LEN);
+		if (len <= 0)
+			len = 1;
+		memset(longname, 'y', len);
+		longname[len] = '\0';
+		strcat(longpath, longname);
+		free(cwd);
+	}
+	exe_cp(src, longpath);
+
+	/*
+	 * Execute as a pre-opened file descriptor, which works whether this is
+	 * a script or not (because the interpreter sees a filename like
+	 * "/dev/fd/20").
+	 */
+	fd = open(longpath, O_RDONLY);
+	if (fd > 0) {
+		printf("Invoke copy of '%s' via filename of length %zu:\n",
+			src, strlen(longpath));
+		fail += check_execveat(fd, "", AT_EMPTY_PATH);
+	} else {
+		printf("Failed to open length %zu filename, errno=%d (%s)\n",
+			strlen(longpath), errno, strerror(errno));
+		fail++;
+	}
+
+	/*
+	 * Execute as a long pathname relative to "/".  If this is a script,
+	 * the interpreter will launch but fail to open the script because its
+	 * name ("/dev/fd/5/xxx....") is bigger than PATH_MAX.
+	 *
+	 * The failure code is usually 127 (POSIX: "If a command is not found,
+	 * the exit status shall be 127."), but some systems give 126 (POSIX:
+	 * "If the command name is found, but it is not an executable utility,
+	 * the exit status shall be 126."), so allow either.
+	 */
+	if (is_script)
+		fail += check_execveat_invoked_rc(root_dfd, longpath + 1, 0,
+						  127, 126);
+	else
+		fail += check_execveat(root_dfd, longpath + 1, 0);
+
+	return fail;
+}
+
+static int run_tests(void)
+{
+	int fail = 0;
+	char *fullname = realpath("execveat", NULL);
+	char *fullname_script = realpath("script", NULL);
+	char *fullname_symlink = concat(fullname, ".symlink");
+	int subdir_dfd = open_or_die("subdir", O_DIRECTORY|O_RDONLY);
+	int subdir_dfd_ephemeral = open_or_die("subdir.ephemeral",
+					       O_DIRECTORY|O_RDONLY);
+	int dot_dfd = open_or_die(".", O_DIRECTORY|O_RDONLY);
+	int root_dfd = open_or_die("/", O_DIRECTORY|O_RDONLY);
+	int dot_dfd_path = open_or_die(".", O_DIRECTORY|O_RDONLY|O_PATH);
+	int dot_dfd_cloexec = open_or_die(".", O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+	int fd = open_or_die("execveat", O_RDONLY);
+	int fd_path = open_or_die("execveat", O_RDONLY|O_PATH);
+	int fd_symlink = open_or_die("execveat.symlink", O_RDONLY);
+	int fd_denatured = open_or_die("execveat.denatured", O_RDONLY);
+	int fd_denatured_path = open_or_die("execveat.denatured",
+					    O_RDONLY|O_PATH);
+	int fd_script = open_or_die("script", O_RDONLY);
+	int fd_ephemeral = open_or_die("execveat.ephemeral", O_RDONLY);
+	int fd_ephemeral_path = open_or_die("execveat.path.ephemeral",
+					    O_RDONLY|O_PATH);
+	int fd_script_ephemeral = open_or_die("script.ephemeral", O_RDONLY);
+	int fd_cloexec = open_or_die("execveat", O_RDONLY|O_CLOEXEC);
+	int fd_script_cloexec = open_or_die("script", O_RDONLY|O_CLOEXEC);
+
+	/* Check if we have execveat at all, and bail early if not */
+	errno = 0;
+	execveat_(-1, NULL, NULL, NULL, 0);
+	if (errno == ENOSYS) {
+		ksft_exit_skip(
+			"ENOSYS calling execveat - no kernel support?\n");
+	}
+
+	/* Change file position to confirm it doesn't affect anything */
+	lseek(fd, 10, SEEK_SET);
+
+	/* Normal executable file: */
+	/*   dfd + path */
+	fail += check_execveat(subdir_dfd, "../execveat", 0);
+	fail += check_execveat(dot_dfd, "execveat", 0);
+	fail += check_execveat(dot_dfd_path, "execveat", 0);
+	/*   absolute path */
+	fail += check_execveat(AT_FDCWD, fullname, 0);
+	/*   absolute path with nonsense dfd */
+	fail += check_execveat(99, fullname, 0);
+	/*   fd + no path */
+	fail += check_execveat(fd, "", AT_EMPTY_PATH);
+	/*   O_CLOEXEC fd + no path */
+	fail += check_execveat(fd_cloexec, "", AT_EMPTY_PATH);
+	/*   O_PATH fd */
+	fail += check_execveat(fd_path, "", AT_EMPTY_PATH);
+
+	/* Mess with executable file that's already open: */
+	/*   fd + no path to a file that's been renamed */
+	rename("execveat.ephemeral", "execveat.moved");
+	fail += check_execveat(fd_ephemeral, "", AT_EMPTY_PATH);
+	/*   fd + no path to a file that's been deleted */
+	unlink("execveat.moved"); /* remove the file now fd open */
+	fail += check_execveat(fd_ephemeral, "", AT_EMPTY_PATH);
+
+	/* Mess with executable file that's already open with O_PATH */
+	/*   fd + no path to a file that's been deleted */
+	unlink("execveat.path.ephemeral");
+	fail += check_execveat(fd_ephemeral_path, "", AT_EMPTY_PATH);
+
+	/* Invalid argument failures */
+	fail += check_execveat_fail(fd, "", 0, ENOENT);
+	fail += check_execveat_fail(fd, NULL, AT_EMPTY_PATH, EFAULT);
+
+	/* Symlink to executable file: */
+	/*   dfd + path */
+	fail += check_execveat(dot_dfd, "execveat.symlink", 0);
+	fail += check_execveat(dot_dfd_path, "execveat.symlink", 0);
+	/*   absolute path */
+	fail += check_execveat(AT_FDCWD, fullname_symlink, 0);
+	/*   fd + no path, even with AT_SYMLINK_NOFOLLOW (already followed) */
+	fail += check_execveat(fd_symlink, "", AT_EMPTY_PATH);
+	fail += check_execveat(fd_symlink, "",
+			       AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW);
+
+	/* Symlink fails when AT_SYMLINK_NOFOLLOW set: */
+	/*   dfd + path */
+	fail += check_execveat_fail(dot_dfd, "execveat.symlink",
+				    AT_SYMLINK_NOFOLLOW, ELOOP);
+	fail += check_execveat_fail(dot_dfd_path, "execveat.symlink",
+				    AT_SYMLINK_NOFOLLOW, ELOOP);
+	/*   absolute path */
+	fail += check_execveat_fail(AT_FDCWD, fullname_symlink,
+				    AT_SYMLINK_NOFOLLOW, ELOOP);
+
+	/* Shell script wrapping executable file: */
+	/*   dfd + path */
+	fail += check_execveat(subdir_dfd, "../script", 0);
+	fail += check_execveat(dot_dfd, "script", 0);
+	fail += check_execveat(dot_dfd_path, "script", 0);
+	/*   absolute path */
+	fail += check_execveat(AT_FDCWD, fullname_script, 0);
+	/*   fd + no path */
+	fail += check_execveat(fd_script, "", AT_EMPTY_PATH);
+	fail += check_execveat(fd_script, "",
+			       AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW);
+	/*   O_CLOEXEC fd fails for a script (as script file inaccessible) */
+	fail += check_execveat_fail(fd_script_cloexec, "", AT_EMPTY_PATH,
+				    ENOENT);
+	fail += check_execveat_fail(dot_dfd_cloexec, "script", 0, ENOENT);
+
+	/* Mess with script file that's already open: */
+	/*   fd + no path to a file that's been renamed */
+	rename("script.ephemeral", "script.moved");
+	fail += check_execveat(fd_script_ephemeral, "", AT_EMPTY_PATH);
+	/*   fd + no path to a file that's been deleted */
+	unlink("script.moved"); /* remove the file while fd open */
+	fail += check_execveat(fd_script_ephemeral, "", AT_EMPTY_PATH);
+
+	/* Rename a subdirectory in the path: */
+	rename("subdir.ephemeral", "subdir.moved");
+	fail += check_execveat(subdir_dfd_ephemeral, "../script", 0);
+	fail += check_execveat(subdir_dfd_ephemeral, "script", 0);
+	/* Remove the subdir and its contents */
+	unlink("subdir.moved/script");
+	unlink("subdir.moved");
+	/* Shell loads via deleted subdir OK because name starts with .. */
+	fail += check_execveat(subdir_dfd_ephemeral, "../script", 0);
+	fail += check_execveat_fail(subdir_dfd_ephemeral, "script", 0, ENOENT);
+
+	/* Flag values other than AT_SYMLINK_NOFOLLOW => EINVAL */
+	fail += check_execveat_fail(dot_dfd, "execveat", 0xFFFF, EINVAL);
+	/* Invalid path => ENOENT */
+	fail += check_execveat_fail(dot_dfd, "no-such-file", 0, ENOENT);
+	fail += check_execveat_fail(dot_dfd_path, "no-such-file", 0, ENOENT);
+	fail += check_execveat_fail(AT_FDCWD, "no-such-file", 0, ENOENT);
+	/* Attempt to execute directory => EACCES */
+	fail += check_execveat_fail(dot_dfd, "", AT_EMPTY_PATH, EACCES);
+	/* Attempt to execute non-executable => EACCES */
+	fail += check_execveat_fail(dot_dfd, "Makefile", 0, EACCES);
+	fail += check_execveat_fail(fd_denatured, "", AT_EMPTY_PATH, EACCES);
+	fail += check_execveat_fail(fd_denatured_path, "", AT_EMPTY_PATH,
+				    EACCES);
+	/* Attempt to execute nonsense FD => EBADF */
+	fail += check_execveat_fail(99, "", AT_EMPTY_PATH, EBADF);
+	fail += check_execveat_fail(99, "execveat", 0, EBADF);
+	/* Attempt to execute relative to non-directory => ENOTDIR */
+	fail += check_execveat_fail(fd, "execveat", 0, ENOTDIR);
+
+	fail += check_execveat_pathmax(root_dfd, "execveat", 0);
+	fail += check_execveat_pathmax(root_dfd, "script", 1);
+	return fail;
+}
+
+static void prerequisites(void)
+{
+	int fd;
+	const char *script = "#!/bin/sh\nexit $*\n";
+
+	/* Create ephemeral copies of files */
+	exe_cp("execveat", "execveat.ephemeral");
+	exe_cp("execveat", "execveat.path.ephemeral");
+	exe_cp("script", "script.ephemeral");
+	mkdir("subdir.ephemeral", 0755);
+
+	fd = open("subdir.ephemeral/script", O_RDWR|O_CREAT|O_TRUNC, 0755);
+	write(fd, script, strlen(script));
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	int ii;
+	int rc;
+	const char *verbose = getenv("VERBOSE");
+
+	if (argc >= 2) {
+		/* If we are invoked with an argument, don't run tests. */
+		const char *in_test = getenv("IN_TEST");
+
+		if (verbose) {
+			printf("  invoked with:");
+			for (ii = 0; ii < argc; ii++)
+				printf(" [%d]='%s'", ii, argv[ii]);
+			printf("\n");
+		}
+
+		/* Check expected environment transferred. */
+		if (!in_test || strcmp(in_test, "yes") != 0) {
+			printf("[FAIL] (no IN_TEST=yes in env)\n");
+			return 1;
+		}
+
+		/* Use the final argument as an exit code. */
+		rc = atoi(argv[argc - 1]);
+		fflush(stdout);
+	} else {
+		prerequisites();
+		if (verbose)
+			envp[1] = "VERBOSE=1";
+		rc = run_tests();
+		if (rc > 0)
+			printf("%d tests failed\n", rc);
+	}
+	return rc;
+}
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
new file mode 100644
index 000000000..8449cf671
--- /dev/null
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -0,0 +1,2 @@
+dnotify_test
+devpts_pts
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
new file mode 100644
index 000000000..129880fb4
--- /dev/null
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../usr/include/
+TEST_GEN_PROGS := devpts_pts
+TEST_GEN_PROGS_EXTENDED := dnotify_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c
new file mode 100644
index 000000000..b1fc9b916
--- /dev/null
+++ b/tools/testing/selftests/filesystems/devpts_pts.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <asm/ioctls.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include "../kselftest.h"
+
+static bool terminal_dup2(int duplicate, int original)
+{
+	int ret;
+
+	ret = dup2(duplicate, original);
+	if (ret < 0)
+		return false;
+
+	return true;
+}
+
+static int terminal_set_stdfds(int fd)
+{
+	int i;
+
+	if (fd < 0)
+		return 0;
+
+	for (i = 0; i < 3; i++)
+		if (!terminal_dup2(fd, (int[]){STDIN_FILENO, STDOUT_FILENO,
+					       STDERR_FILENO}[i]))
+			return -1;
+
+	return 0;
+}
+
+static int login_pty(int fd)
+{
+	int ret;
+
+	setsid();
+
+	ret = ioctl(fd, TIOCSCTTY, NULL);
+	if (ret < 0)
+		return -1;
+
+	ret = terminal_set_stdfds(fd);
+	if (ret < 0)
+		return -1;
+
+	if (fd > STDERR_FILENO)
+		close(fd);
+
+	return 0;
+}
+
+static int wait_for_pid(pid_t pid)
+{
+	int status, ret;
+
+again:
+	ret = waitpid(pid, &status, 0);
+	if (ret == -1) {
+		if (errno == EINTR)
+			goto again;
+		return -1;
+	}
+	if (ret != pid)
+		goto again;
+
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+		return -1;
+
+	return 0;
+}
+
+static int resolve_procfd_symlink(int fd, char *buf, size_t buflen)
+{
+	int ret;
+	char procfd[4096];
+
+	ret = snprintf(procfd, 4096, "/proc/self/fd/%d", fd);
+	if (ret < 0 || ret >= 4096)
+		return -1;
+
+	ret = readlink(procfd, buf, buflen);
+	if (ret < 0 || (size_t)ret >= buflen)
+		return -1;
+
+	buf[ret] = '\0';
+
+	return 0;
+}
+
+static int do_tiocgptpeer(char *ptmx, char *expected_procfd_contents)
+{
+	int ret;
+	int master = -1, slave = -1, fret = -1;
+
+	master = open(ptmx, O_RDWR | O_NOCTTY | O_CLOEXEC);
+	if (master < 0) {
+		fprintf(stderr, "Failed to open \"%s\": %s\n", ptmx,
+			strerror(errno));
+		return -1;
+	}
+
+	/*
+	 * grantpt() makes assumptions about /dev/pts/ so ignore it. It's also
+	 * not really needed.
+	 */
+	ret = unlockpt(master);
+	if (ret < 0) {
+		fprintf(stderr, "Failed to unlock terminal\n");
+		goto do_cleanup;
+	}
+
+#ifdef TIOCGPTPEER
+	slave = ioctl(master, TIOCGPTPEER, O_RDWR | O_NOCTTY | O_CLOEXEC);
+#endif
+	if (slave < 0) {
+		if (errno == EINVAL) {
+			fprintf(stderr, "TIOCGPTPEER is not supported. "
+					"Skipping test.\n");
+			fret = KSFT_SKIP;
+		} else {
+			fprintf(stderr,
+				"Failed to perform TIOCGPTPEER ioctl\n");
+			fret = EXIT_FAILURE;
+		}
+		goto do_cleanup;
+	}
+
+	pid_t pid = fork();
+	if (pid < 0)
+		goto do_cleanup;
+
+	if (pid == 0) {
+		char buf[4096];
+
+		ret = login_pty(slave);
+		if (ret < 0) {
+			fprintf(stderr, "Failed to setup terminal\n");
+			_exit(EXIT_FAILURE);
+		}
+
+		ret = resolve_procfd_symlink(STDIN_FILENO, buf, sizeof(buf));
+		if (ret < 0) {
+			fprintf(stderr, "Failed to retrieve pathname of pts "
+					"slave file descriptor\n");
+			_exit(EXIT_FAILURE);
+		}
+
+		if (strncmp(expected_procfd_contents, buf,
+			    strlen(expected_procfd_contents)) != 0) {
+			fprintf(stderr, "Received invalid contents for "
+					"\"/proc/<pid>/fd/%d\" symlink: %s\n",
+					STDIN_FILENO, buf);
+			_exit(-1);
+		}
+
+		fprintf(stderr, "Contents of \"/proc/<pid>/fd/%d\" "
+				"symlink are valid: %s\n", STDIN_FILENO, buf);
+
+		_exit(EXIT_SUCCESS);
+	}
+
+	ret = wait_for_pid(pid);
+	if (ret < 0)
+		goto do_cleanup;
+
+	fret = EXIT_SUCCESS;
+
+do_cleanup:
+	if (master >= 0)
+		close(master);
+	if (slave >= 0)
+		close(slave);
+
+	return fret;
+}
+
+static int verify_non_standard_devpts_mount(void)
+{
+	char *mntpoint;
+	int ret = -1;
+	char devpts[] = P_tmpdir "/devpts_fs_XXXXXX";
+	char ptmx[] = P_tmpdir "/devpts_fs_XXXXXX/ptmx";
+
+	ret = umount("/dev/pts");
+	if (ret < 0) {
+		fprintf(stderr, "Failed to unmount \"/dev/pts\": %s\n",
+				strerror(errno));
+		return -1;
+	}
+
+	(void)umount("/dev/ptmx");
+
+	mntpoint = mkdtemp(devpts);
+	if (!mntpoint) {
+		fprintf(stderr, "Failed to create temporary mountpoint: %s\n",
+				 strerror(errno));
+		return -1;
+	}
+
+	ret = mount("devpts", mntpoint, "devpts", MS_NOSUID | MS_NOEXEC,
+		    "newinstance,ptmxmode=0666,mode=0620,gid=5");
+	if (ret < 0) {
+		fprintf(stderr, "Failed to mount devpts fs to \"%s\" in new "
+				"mount namespace: %s\n", mntpoint,
+				strerror(errno));
+		unlink(mntpoint);
+		return -1;
+	}
+
+	ret = snprintf(ptmx, sizeof(ptmx), "%s/ptmx", devpts);
+	if (ret < 0 || (size_t)ret >= sizeof(ptmx)) {
+		unlink(mntpoint);
+		return -1;
+	}
+
+	ret = do_tiocgptpeer(ptmx, mntpoint);
+	unlink(mntpoint);
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+
+static int verify_ptmx_bind_mount(void)
+{
+	int ret;
+
+	ret = mount("/dev/pts/ptmx", "/dev/ptmx", NULL, MS_BIND, NULL);
+	if (ret < 0) {
+		fprintf(stderr, "Failed to bind mount \"/dev/pts/ptmx\" to "
+				"\"/dev/ptmx\" mount namespace\n");
+		return -1;
+	}
+
+	ret = do_tiocgptpeer("/dev/ptmx", "/dev/pts/");
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+
+static int verify_invalid_ptmx_bind_mount(void)
+{
+	int ret;
+	char mntpoint_fd;
+	char ptmx[] = P_tmpdir "/devpts_ptmx_XXXXXX";
+
+	mntpoint_fd = mkstemp(ptmx);
+	if (mntpoint_fd < 0) {
+		fprintf(stderr, "Failed to create temporary directory: %s\n",
+				 strerror(errno));
+		return -1;
+	}
+
+	ret = mount("/dev/pts/ptmx", ptmx, NULL, MS_BIND, NULL);
+	close(mntpoint_fd);
+	if (ret < 0) {
+		fprintf(stderr, "Failed to bind mount \"/dev/pts/ptmx\" to "
+				"\"%s\" mount namespace\n", ptmx);
+		return -1;
+	}
+
+	ret = do_tiocgptpeer(ptmx, "/dev/pts/");
+	if (ret == 0)
+		return -1;
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret;
+
+	if (!isatty(STDIN_FILENO)) {
+		fprintf(stderr, "Standard input file descriptor is not attached "
+				"to a terminal. Skipping test\n");
+		exit(KSFT_SKIP);
+	}
+
+	ret = unshare(CLONE_NEWNS);
+	if (ret < 0) {
+		fprintf(stderr, "Failed to unshare mount namespace\n");
+		exit(EXIT_FAILURE);
+	}
+
+	ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
+	if (ret < 0) {
+		fprintf(stderr, "Failed to make \"/\" MS_PRIVATE in new mount "
+				"namespace\n");
+		exit(EXIT_FAILURE);
+	}
+
+	ret = verify_ptmx_bind_mount();
+	if (ret < 0)
+		exit(EXIT_FAILURE);
+
+	ret = verify_invalid_ptmx_bind_mount();
+	if (ret < 0)
+		exit(EXIT_FAILURE);
+
+	ret = verify_non_standard_devpts_mount();
+	if (ret < 0)
+		exit(EXIT_FAILURE);
+
+	exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/filesystems/dnotify_test.c b/tools/testing/selftests/filesystems/dnotify_test.c
new file mode 100644
index 000000000..c0a9b2d33
--- /dev/null
+++ b/tools/testing/selftests/filesystems/dnotify_test.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE	/* needed to get the defines */
+#include <fcntl.h>	/* in glibc 2.2 this has the needed
+				   values defined */
+#include <signal.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static volatile int event_fd;
+
+static void handler(int sig, siginfo_t *si, void *data)
+{
+	event_fd = si->si_fd;
+}
+
+int main(void)
+{
+	struct sigaction act;
+	int fd;
+
+	act.sa_sigaction = handler;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_SIGINFO;
+	sigaction(SIGRTMIN + 1, &act, NULL);
+
+	fd = open(".", O_RDONLY);
+	fcntl(fd, F_SETSIG, SIGRTMIN + 1);
+	fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT);
+	/* we will now be notified if any of the files
+	   in "." is modified or new files are created */
+	while (1) {
+		pause();
+		printf("Got event on fd=%d\n", event_fd);
+	}
+}
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
new file mode 100644
index 000000000..261c81f08
--- /dev/null
+++ b/tools/testing/selftests/firmware/Makefile
@@ -0,0 +1,12 @@
+# Makefile for firmware loading selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := fw_run_tests.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
+
+include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config
new file mode 100644
index 000000000..bf634dda0
--- /dev/null
+++ b/tools/testing/selftests/firmware/config
@@ -0,0 +1,5 @@
+CONFIG_TEST_FIRMWARE=y
+CONFIG_FW_LOADER=y
+CONFIG_FW_LOADER_USER_HELPER=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
new file mode 100755
index 000000000..70d18be46
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# This validates that the kernel will fall back to using the fallback mechanism
+# to load firmware it can't find on disk itself. We must request a firmware
+# that the kernel won't find, and any installed helper (e.g. udev) also
+# won't find so that we can do the load ourself manually.
+set -e
+
+TEST_REQS_FW_SYSFS_FALLBACK="yes"
+TEST_REQS_FW_SET_CUSTOM_PATH="no"
+TEST_DIR=$(dirname $0)
+source $TEST_DIR/fw_lib.sh
+
+check_mods
+check_setup
+verify_reqs
+setup_tmp_file
+
+trap "test_finish" EXIT
+
+load_fw()
+{
+	local name="$1"
+	local file="$2"
+
+	# This will block until our load (below) has finished.
+	echo -n "$name" >"$DIR"/trigger_request &
+
+	# Give kernel a chance to react.
+	local timeout=10
+	while [ ! -e "$DIR"/"$name"/loading ]; do
+		sleep 0.1
+		timeout=$(( $timeout - 1 ))
+		if [ "$timeout" -eq 0 ]; then
+			echo "$0: firmware interface never appeared" >&2
+			exit 1
+		fi
+	done
+
+	echo 1 >"$DIR"/"$name"/loading
+	cat "$file" >"$DIR"/"$name"/data
+	echo 0 >"$DIR"/"$name"/loading
+
+	# Wait for request to finish.
+	wait
+}
+
+load_fw_cancel()
+{
+	local name="$1"
+	local file="$2"
+
+	# This will block until our load (below) has finished.
+	echo -n "$name" >"$DIR"/trigger_request 2>/dev/null &
+
+	# Give kernel a chance to react.
+	local timeout=10
+	while [ ! -e "$DIR"/"$name"/loading ]; do
+		sleep 0.1
+		timeout=$(( $timeout - 1 ))
+		if [ "$timeout" -eq 0 ]; then
+			echo "$0: firmware interface never appeared" >&2
+			exit 1
+		fi
+	done
+
+	echo -1 >"$DIR"/"$name"/loading
+
+	# Wait for request to finish.
+	wait
+}
+
+load_fw_custom()
+{
+	if [ ! -e "$DIR"/trigger_custom_fallback ]; then
+		echo "$0: custom fallback trigger not present, ignoring test" >&2
+		exit $ksft_skip
+	fi
+
+	local name="$1"
+	local file="$2"
+
+	echo -n "$name" >"$DIR"/trigger_custom_fallback 2>/dev/null &
+
+	# Give kernel a chance to react.
+	local timeout=10
+	while [ ! -e "$DIR"/"$name"/loading ]; do
+		sleep 0.1
+		timeout=$(( $timeout - 1 ))
+		if [ "$timeout" -eq 0 ]; then
+			echo "$0: firmware interface never appeared" >&2
+			exit 1
+		fi
+	done
+
+	echo 1 >"$DIR"/"$name"/loading
+	cat "$file" >"$DIR"/"$name"/data
+	echo 0 >"$DIR"/"$name"/loading
+
+	# Wait for request to finish.
+	wait
+	return 0
+}
+
+
+load_fw_custom_cancel()
+{
+	if [ ! -e "$DIR"/trigger_custom_fallback ]; then
+		echo "$0: canceling custom fallback trigger not present, ignoring test" >&2
+		exit $ksft_skip
+	fi
+
+	local name="$1"
+	local file="$2"
+
+	echo -n "$name" >"$DIR"/trigger_custom_fallback 2>/dev/null &
+
+	# Give kernel a chance to react.
+	local timeout=10
+	while [ ! -e "$DIR"/"$name"/loading ]; do
+		sleep 0.1
+		timeout=$(( $timeout - 1 ))
+		if [ "$timeout" -eq 0 ]; then
+			echo "$0: firmware interface never appeared" >&2
+			exit 1
+		fi
+	done
+
+	echo -1 >"$DIR"/"$name"/loading
+
+	# Wait for request to finish.
+	wait
+	return 0
+}
+
+load_fw_fallback_with_child()
+{
+	local name="$1"
+	local file="$2"
+
+	# This is the value already set but we want to be explicit
+	echo 4 >/sys/class/firmware/timeout
+
+	sleep 1 &
+	SECONDS_BEFORE=$(date +%s)
+	echo -n "$name" >"$DIR"/trigger_request 2>/dev/null
+	SECONDS_AFTER=$(date +%s)
+	SECONDS_DELTA=$(($SECONDS_AFTER - $SECONDS_BEFORE))
+	if [ "$SECONDS_DELTA" -lt 4 ]; then
+		RET=1
+	else
+		RET=0
+	fi
+	wait
+	return $RET
+}
+
+test_syfs_timeout()
+{
+	DEVPATH="$DIR"/"nope-$NAME"/loading
+
+	# Test failure when doing nothing (timeout works).
+	echo -n 2 >/sys/class/firmware/timeout
+	echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null &
+
+	# Give the kernel some time to load the loading file, must be less
+	# than the timeout above.
+	sleep 1
+	if [ ! -f $DEVPATH ]; then
+		echo "$0: fallback mechanism immediately cancelled"
+		echo ""
+		echo "The file never appeared: $DEVPATH"
+		echo ""
+		echo "This might be a distribution udev rule setup by your distribution"
+		echo "to immediately cancel all fallback requests, this must be"
+		echo "removed before running these tests. To confirm look for"
+		echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules"
+		echo "and see if you have something like this:"
+		echo ""
+		echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\""
+		echo ""
+		echo "If you do remove this file or comment out this line before"
+		echo "proceeding with these tests."
+		exit 1
+	fi
+
+	if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+		echo "$0: firmware was not expected to match" >&2
+		exit 1
+	else
+		echo "$0: timeout works"
+	fi
+}
+
+run_sysfs_main_tests()
+{
+	test_syfs_timeout
+	# Put timeout high enough for us to do work but not so long that failures
+	# slow down this test too much.
+	echo 4 >/sys/class/firmware/timeout
+
+	# Load this script instead of the desired firmware.
+	load_fw "$NAME" "$0"
+	if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+		echo "$0: firmware was not expected to match" >&2
+		exit 1
+	else
+		echo "$0: firmware comparison works"
+	fi
+
+	# Do a proper load, which should work correctly.
+	load_fw "$NAME" "$FW"
+	if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+		echo "$0: firmware was not loaded" >&2
+		exit 1
+	else
+		echo "$0: fallback mechanism works"
+	fi
+
+	load_fw_cancel "nope-$NAME" "$FW"
+	if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+		echo "$0: firmware was expected to be cancelled" >&2
+		exit 1
+	else
+		echo "$0: cancelling fallback mechanism works"
+	fi
+
+	set +e
+	load_fw_fallback_with_child "nope-signal-$NAME" "$FW"
+	if [ "$?" -eq 0 ]; then
+		echo "$0: SIGCHLD on sync ignored as expected" >&2
+	else
+		echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2
+		exit 1
+	fi
+	set -e
+}
+
+run_sysfs_custom_load_tests()
+{
+	RANDOM_FILE_PATH=$(setup_random_file)
+	RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+	if load_fw_custom "$RANDOM_FILE" "$RANDOM_FILE_PATH" ; then
+		if ! diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then
+			echo "$0: firmware was not loaded" >&2
+			exit 1
+		else
+			echo "$0: custom fallback loading mechanism works"
+		fi
+	fi
+
+	RANDOM_FILE_PATH=$(setup_random_file)
+	RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+	if load_fw_custom "$RANDOM_FILE" "$RANDOM_FILE_PATH" ; then
+		if ! diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then
+			echo "$0: firmware was not loaded" >&2
+			exit 1
+		else
+			echo "$0: custom fallback loading mechanism works"
+		fi
+	fi
+
+	RANDOM_FILE_REAL="$RANDOM_FILE_PATH"
+	FAKE_RANDOM_FILE_PATH=$(setup_random_file_fake)
+	FAKE_RANDOM_FILE="$(basename $FAKE_RANDOM_FILE_PATH)"
+
+	if load_fw_custom_cancel "$FAKE_RANDOM_FILE" "$RANDOM_FILE_REAL" ; then
+		if diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then
+			echo "$0: firmware was expected to be cancelled" >&2
+			exit 1
+		else
+			echo "$0: cancelling custom fallback mechanism works"
+		fi
+	fi
+}
+
+if [ "$HAS_FW_LOADER_USER_HELPER_FALLBACK" = "yes" ]; then
+	run_sysfs_main_tests
+fi
+
+run_sysfs_custom_load_tests
+
+exit 0
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
new file mode 100755
index 000000000..a4320c4b4
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# This validates that the kernel will load firmware out of its list of
+# firmware locations on disk. Since the user helper does similar work,
+# we reset the custom load directory to a location the user helper doesn't
+# know so we can be sure we're not accidentally testing the user helper.
+set -e
+
+TEST_REQS_FW_SYSFS_FALLBACK="no"
+TEST_REQS_FW_SET_CUSTOM_PATH="yes"
+TEST_DIR=$(dirname $0)
+source $TEST_DIR/fw_lib.sh
+
+check_mods
+check_setup
+verify_reqs
+setup_tmp_file
+
+trap "test_finish" EXIT
+
+if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+	# Turn down the timeout so failures don't take so long.
+	echo 1 >/sys/class/firmware/timeout
+fi
+
+if printf '\000' >"$DIR"/trigger_request 2> /dev/null; then
+	echo "$0: empty filename should not succeed" >&2
+	exit 1
+fi
+
+if [ ! -e "$DIR"/trigger_async_request ]; then
+	echo "$0: empty filename: async trigger not present, ignoring test" >&2
+	exit $ksft_skip
+else
+	if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then
+		echo "$0: empty filename should not succeed (async)" >&2
+		exit 1
+	fi
+fi
+
+# Request a firmware that doesn't exist, it should fail.
+if echo -n "nope-$NAME" >"$DIR"/trigger_request 2> /dev/null; then
+	echo "$0: firmware shouldn't have loaded" >&2
+	exit 1
+fi
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not expected to match" >&2
+	exit 1
+else
+	if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+		echo "$0: timeout works"
+	fi
+fi
+
+# This should succeed via kernel load or will fail after 1 second after
+# being handed over to the user helper, which won't find the fw either.
+if ! echo -n "$NAME" >"$DIR"/trigger_request ; then
+	echo "$0: could not trigger request" >&2
+	exit 1
+fi
+
+# Verify the contents are what we expect.
+if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+	echo "$0: firmware was not loaded" >&2
+	exit 1
+else
+	echo "$0: filesystem loading works"
+fi
+
+# Try the asynchronous version too
+if [ ! -e "$DIR"/trigger_async_request ]; then
+	echo "$0: firmware loading: async trigger not present, ignoring test" >&2
+	exit $ksft_skip
+else
+	if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then
+		echo "$0: could not trigger async request" >&2
+		exit 1
+	fi
+
+	# Verify the contents are what we expect.
+	if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+		echo "$0: firmware was not loaded (async)" >&2
+		exit 1
+	else
+		echo "$0: async filesystem loading works"
+	fi
+fi
+
+### Batched requests tests
+test_config_present()
+{
+	if [ ! -f $DIR/reset ]; then
+		echo "Configuration triggers not present, ignoring test"
+		exit $ksft_skip
+	fi
+}
+
+# Defaults :
+#
+# send_uevent: 1
+# sync_direct: 0
+# name: test-firmware.bin
+# num_requests: 4
+config_reset()
+{
+	echo 1 >  $DIR/reset
+}
+
+release_all_firmware()
+{
+	echo 1 >  $DIR/release_all_firmware
+}
+
+config_set_name()
+{
+	echo -n $1 >  $DIR/config_name
+}
+
+config_set_sync_direct()
+{
+	echo 1 >  $DIR/config_sync_direct
+}
+
+config_unset_sync_direct()
+{
+	echo 0 >  $DIR/config_sync_direct
+}
+
+config_set_uevent()
+{
+	echo 1 >  $DIR/config_send_uevent
+}
+
+config_unset_uevent()
+{
+	echo 0 >  $DIR/config_send_uevent
+}
+
+config_trigger_sync()
+{
+	echo -n 1 > $DIR/trigger_batched_requests 2>/dev/null
+}
+
+config_trigger_async()
+{
+	echo -n 1 > $DIR/trigger_batched_requests_async 2> /dev/null
+}
+
+config_set_read_fw_idx()
+{
+	echo -n $1 > $DIR/config_read_fw_idx 2> /dev/null
+}
+
+read_firmwares()
+{
+	for i in $(seq 0 3); do
+		config_set_read_fw_idx $i
+		# Verify the contents are what we expect.
+		# -Z required for now -- check for yourself, md5sum
+		# on $FW and DIR/read_firmware will yield the same. Even
+		# cmp agrees, so something is off.
+		if ! diff -q -Z "$FW" $DIR/read_firmware 2>/dev/null ; then
+			echo "request #$i: firmware was not loaded" >&2
+			exit 1
+		fi
+	done
+}
+
+read_firmwares_expect_nofile()
+{
+	for i in $(seq 0 3); do
+		config_set_read_fw_idx $i
+		# Ensures contents differ
+		if diff -q -Z "$FW" $DIR/read_firmware 2>/dev/null ; then
+			echo "request $i: file was not expected to match" >&2
+			exit 1
+		fi
+	done
+}
+
+test_batched_request_firmware_nofile()
+{
+	echo -n "Batched request_firmware() nofile try #$1: "
+	config_reset
+	config_set_name nope-test-firmware.bin
+	config_trigger_sync
+	read_firmwares_expect_nofile
+	release_all_firmware
+	echo "OK"
+}
+
+test_batched_request_firmware_direct_nofile()
+{
+	echo -n "Batched request_firmware_direct() nofile try #$1: "
+	config_reset
+	config_set_name nope-test-firmware.bin
+	config_set_sync_direct
+	config_trigger_sync
+	release_all_firmware
+	echo "OK"
+}
+
+test_request_firmware_nowait_uevent_nofile()
+{
+	echo -n "Batched request_firmware_nowait(uevent=true) nofile try #$1: "
+	config_reset
+	config_set_name nope-test-firmware.bin
+	config_trigger_async
+	release_all_firmware
+	echo "OK"
+}
+
+test_wait_and_cancel_custom_load()
+{
+	if [ "$HAS_FW_LOADER_USER_HELPER" != "yes" ]; then
+		return
+	fi
+	local timeout=10
+	name=$1
+	while [ ! -e "$DIR"/"$name"/loading ]; do
+		sleep 0.1
+		timeout=$(( $timeout - 1 ))
+		if [ "$timeout" -eq 0 ]; then
+			echo "firmware interface never appeared:" >&2
+			echo "$DIR/$name/loading" >&2
+			exit 1
+		fi
+	done
+	echo -1 >"$DIR"/"$name"/loading
+}
+
+test_request_firmware_nowait_custom_nofile()
+{
+	echo -n "Batched request_firmware_nowait(uevent=false) nofile try #$1: "
+	config_reset
+	config_unset_uevent
+	RANDOM_FILE_PATH=$(setup_random_file_fake)
+	RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+	config_set_name $RANDOM_FILE
+	config_trigger_async &
+	test_wait_and_cancel_custom_load $RANDOM_FILE
+	wait
+	release_all_firmware
+	echo "OK"
+}
+
+test_batched_request_firmware()
+{
+	echo -n "Batched request_firmware() try #$1: "
+	config_reset
+	config_trigger_sync
+	read_firmwares
+	release_all_firmware
+	echo "OK"
+}
+
+test_batched_request_firmware_direct()
+{
+	echo -n "Batched request_firmware_direct() try #$1: "
+	config_reset
+	config_set_sync_direct
+	config_trigger_sync
+	release_all_firmware
+	echo "OK"
+}
+
+test_request_firmware_nowait_uevent()
+{
+	echo -n "Batched request_firmware_nowait(uevent=true) try #$1: "
+	config_reset
+	config_trigger_async
+	release_all_firmware
+	echo "OK"
+}
+
+test_request_firmware_nowait_custom()
+{
+	echo -n "Batched request_firmware_nowait(uevent=false) try #$1: "
+	config_reset
+	config_unset_uevent
+	RANDOM_FILE_PATH=$(setup_random_file)
+	RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+	config_set_name $RANDOM_FILE
+	config_trigger_async
+	release_all_firmware
+	echo "OK"
+}
+
+# Only continue if batched request triggers are present on the
+# test-firmware driver
+test_config_present
+
+# test with the file present
+echo
+echo "Testing with the file present..."
+for i in $(seq 1 5); do
+	test_batched_request_firmware $i
+done
+
+for i in $(seq 1 5); do
+	test_batched_request_firmware_direct $i
+done
+
+for i in $(seq 1 5); do
+	test_request_firmware_nowait_uevent $i
+done
+
+for i in $(seq 1 5); do
+	test_request_firmware_nowait_custom $i
+done
+
+# Test for file not found, errors are expected, the failure would be
+# a hung task, which would require a hard reset.
+echo
+echo "Testing with the file missing..."
+for i in $(seq 1 5); do
+	test_batched_request_firmware_nofile $i
+done
+
+for i in $(seq 1 5); do
+	test_batched_request_firmware_direct_nofile $i
+done
+
+for i in $(seq 1 5); do
+	test_request_firmware_nowait_uevent_nofile $i
+done
+
+for i in $(seq 1 5); do
+	test_request_firmware_nowait_custom_nofile $i
+done
+
+exit 0
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
new file mode 100755
index 000000000..8a853ace5
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Library of helpers for test scripts.
+set -e
+
+DIR=/sys/devices/virtual/misc/test_firmware
+
+PROC_CONFIG="/proc/config.gz"
+TEST_DIR=$(dirname $0)
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+print_reqs_exit()
+{
+	echo "You must have the following enabled in your kernel:" >&2
+	cat $TEST_DIR/config >&2
+	exit $ksft_skip
+}
+
+test_modprobe()
+{
+	if [ ! -d $DIR ]; then
+		print_reqs_exit
+	fi
+}
+
+check_mods()
+{
+	local uid=$(id -u)
+	if [ $uid -ne 0 ]; then
+		echo "skip all tests: must be run as root" >&2
+		exit $ksft_skip
+	fi
+
+	trap "test_modprobe" EXIT
+	if [ ! -d $DIR ]; then
+		modprobe test_firmware
+	fi
+	if [ ! -f $PROC_CONFIG ]; then
+		if modprobe configs 2>/dev/null; then
+			echo "Loaded configs module"
+			if [ ! -f $PROC_CONFIG ]; then
+				echo "You must have the following enabled in your kernel:" >&2
+				cat $TEST_DIR/config >&2
+				echo "Resorting to old heuristics" >&2
+			fi
+		else
+			echo "Failed to load configs module, using old heuristics" >&2
+		fi
+	fi
+}
+
+check_setup()
+{
+	HAS_FW_LOADER_USER_HELPER="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER=y)"
+	HAS_FW_LOADER_USER_HELPER_FALLBACK="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y)"
+	PROC_FW_IGNORE_SYSFS_FALLBACK="0"
+	PROC_FW_FORCE_SYSFS_FALLBACK="0"
+
+	if [ -z $PROC_SYS_DIR ]; then
+		PROC_SYS_DIR="/proc/sys/kernel"
+	fi
+
+	FW_PROC="${PROC_SYS_DIR}/firmware_config"
+	FW_FORCE_SYSFS_FALLBACK="$FW_PROC/force_sysfs_fallback"
+	FW_IGNORE_SYSFS_FALLBACK="$FW_PROC/ignore_sysfs_fallback"
+
+	if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
+		PROC_FW_FORCE_SYSFS_FALLBACK="$(cat $FW_FORCE_SYSFS_FALLBACK)"
+	fi
+
+	if [ -f $FW_IGNORE_SYSFS_FALLBACK ]; then
+		PROC_FW_IGNORE_SYSFS_FALLBACK="$(cat $FW_IGNORE_SYSFS_FALLBACK)"
+	fi
+
+	if [ "$PROC_FW_FORCE_SYSFS_FALLBACK" = "1" ]; then
+		HAS_FW_LOADER_USER_HELPER="yes"
+		HAS_FW_LOADER_USER_HELPER_FALLBACK="yes"
+	fi
+
+	if [ "$PROC_FW_IGNORE_SYSFS_FALLBACK" = "1" ]; then
+		HAS_FW_LOADER_USER_HELPER_FALLBACK="no"
+		HAS_FW_LOADER_USER_HELPER="no"
+	fi
+
+	if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+	       OLD_TIMEOUT="$(cat /sys/class/firmware/timeout)"
+	fi
+
+	OLD_FWPATH="$(cat /sys/module/firmware_class/parameters/path)"
+}
+
+verify_reqs()
+{
+	if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then
+		if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+			echo "usermode helper disabled so ignoring test"
+			exit 0
+		fi
+	fi
+}
+
+setup_tmp_file()
+{
+	FWPATH=$(mktemp -d)
+	FW="$FWPATH/test-firmware.bin"
+	echo "ABCD0123" >"$FW"
+	NAME=$(basename "$FW")
+	if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
+		echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path
+	fi
+}
+
+__setup_random_file()
+{
+	RANDOM_FILE_PATH="$(mktemp -p $FWPATH)"
+	# mktemp says dry-run -n is unsafe, so...
+	if [[ "$1" = "fake" ]]; then
+		rm -rf $RANDOM_FILE_PATH
+		sync
+	else
+		echo "ABCD0123" >"$RANDOM_FILE_PATH"
+	fi
+	echo $RANDOM_FILE_PATH
+}
+
+setup_random_file()
+{
+	echo $(__setup_random_file)
+}
+
+setup_random_file_fake()
+{
+	echo $(__setup_random_file fake)
+}
+
+proc_set_force_sysfs_fallback()
+{
+	if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
+		echo -n $1 > $FW_FORCE_SYSFS_FALLBACK
+		check_setup
+	fi
+}
+
+proc_set_ignore_sysfs_fallback()
+{
+	if [ -f $FW_IGNORE_SYSFS_FALLBACK ]; then
+		echo -n $1 > $FW_IGNORE_SYSFS_FALLBACK
+		check_setup
+	fi
+}
+
+proc_restore_defaults()
+{
+	proc_set_force_sysfs_fallback 0
+	proc_set_ignore_sysfs_fallback 0
+}
+
+test_finish()
+{
+	if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+		echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
+	fi
+	if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
+		if [ "$OLD_FWPATH" = "" ]; then
+			# A zero-length write won't work; write a null byte
+			printf '\000' >/sys/module/firmware_class/parameters/path
+		else
+			echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path
+		fi
+	fi
+	if [ -f $FW ]; then
+		rm -f "$FW"
+	fi
+	if [ -d $FWPATH ]; then
+		rm -rf "$FWPATH"
+	fi
+	proc_restore_defaults
+}
+
+kconfig_has()
+{
+	if [ -f $PROC_CONFIG ]; then
+		if zgrep -q $1 $PROC_CONFIG 2>/dev/null; then
+			echo "yes"
+		else
+			echo "no"
+		fi
+	else
+		# We currently don't have easy heuristics to infer this
+		# so best we can do is just try to use the kernel assuming
+		# you had enabled it. This matches the old behaviour.
+		if [ "$1" = "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y" ]; then
+			echo "yes"
+		elif [ "$1" = "CONFIG_FW_LOADER_USER_HELPER=y" ]; then
+			if [ -d /sys/class/firmware/ ]; then
+				echo yes
+			else
+				echo no
+			fi
+		fi
+	fi
+}
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
new file mode 100755
index 000000000..cffdd4eb0
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_run_tests.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This runs all known tests across all known possible configurations we could
+# emulate in one run.
+
+set -e
+
+TEST_DIR=$(dirname $0)
+source $TEST_DIR/fw_lib.sh
+
+export HAS_FW_LOADER_USER_HELPER=""
+export HAS_FW_LOADER_USER_HELPER_FALLBACK=""
+
+run_tests()
+{
+	proc_set_force_sysfs_fallback $1
+	proc_set_ignore_sysfs_fallback $2
+	$TEST_DIR/fw_filesystem.sh
+
+	proc_set_force_sysfs_fallback $1
+	proc_set_ignore_sysfs_fallback $2
+	$TEST_DIR/fw_fallback.sh
+}
+
+run_test_config_0001()
+{
+	echo "-----------------------------------------------------"
+	echo "Running kernel configuration test 1 -- rare"
+	echo "Emulates:"
+	echo "CONFIG_FW_LOADER=y"
+	echo "CONFIG_FW_LOADER_USER_HELPER=n"
+	echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n"
+	run_tests 0 1
+}
+
+run_test_config_0002()
+{
+	echo "-----------------------------------------------------"
+	echo "Running kernel configuration test 2 -- distro"
+	echo "Emulates:"
+	echo "CONFIG_FW_LOADER=y"
+	echo "CONFIG_FW_LOADER_USER_HELPER=y"
+	echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n"
+	proc_set_ignore_sysfs_fallback 0
+	run_tests 0 0
+}
+
+run_test_config_0003()
+{
+	echo "-----------------------------------------------------"
+	echo "Running kernel configuration test 3 -- android"
+	echo "Emulates:"
+	echo "CONFIG_FW_LOADER=y"
+	echo "CONFIG_FW_LOADER_USER_HELPER=y"
+	echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y"
+	run_tests 1 0
+}
+
+check_mods
+check_setup
+
+if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
+	run_test_config_0001
+	run_test_config_0002
+	run_test_config_0003
+else
+	echo "Running basic kernel configuration, working with your config"
+	run_tests
+fi
diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore
new file mode 100644
index 000000000..98d8a5a63
--- /dev/null
+++ b/tools/testing/selftests/ftrace/.gitignore
@@ -0,0 +1 @@
+logs
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
new file mode 100644
index 000000000..cd1f5b3a7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := ftracetest
+TEST_FILES := test.d
+EXTRA_CLEAN := $(OUTPUT)/logs/*
+
+include ../lib.mk
diff --git a/tools/testing/selftests/ftrace/README b/tools/testing/selftests/ftrace/README
new file mode 100644
index 000000000..182e76fa4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/README
@@ -0,0 +1,82 @@
+Linux Ftrace Testcases
+
+This is a collection of testcases for ftrace tracing feature in the Linux
+kernel. Since ftrace exports interfaces via the debugfs, we just need
+shell scripts for testing. Feel free to add new test cases.
+
+Running the ftrace testcases
+============================
+
+At first, you need to be the root user to run this script.
+To run all testcases:
+
+  $ sudo ./ftracetest
+
+To run specific testcases:
+
+  # ./ftracetest test.d/basic3.tc
+
+Or you can also run testcases under given directory:
+
+  # ./ftracetest test.d/kprobe/
+
+Contributing new testcases
+==========================
+
+Copy test.d/template to your testcase (whose filename must have *.tc
+extension) and rewrite the test description line.
+
+ * The working directory of the script is <debugfs>/tracing/.
+
+ * Take care with side effects as the tests are run with root privilege.
+
+ * The tests should not run for a long period of time (more than 1 min.)
+   These are to be unit tests.
+
+ * You can add a directory for your testcases under test.d/ if needed.
+
+ * The test cases should run on dash (busybox shell) for testing on
+   minimal cross-build environments.
+
+ * Note that the tests are run with "set -e" (errexit) option. If any
+   command fails, the test will be terminated immediately.
+
+ * The tests can return some result codes instead of pass or fail by
+   using exit_unresolved, exit_untested, exit_unsupported and exit_xfail.
+
+Result code
+===========
+
+Ftracetest supports following result codes.
+
+ * PASS: The test succeeded as expected. The test which exits with 0 is
+         counted as passed test.
+
+ * FAIL: The test failed, but was expected to succeed. The test which exits
+         with !0 is counted as failed test.
+
+ * UNRESOLVED: The test produced unclear or intermidiate results.
+             for example, the test was interrupted
+                       or the test depends on a previous test, which failed.
+                       or the test was set up incorrectly
+             The test which is in above situation, must call exit_unresolved.
+
+ * UNTESTED: The test was not run, currently just a placeholder.
+             In this case, the test must call exit_untested.
+
+ * UNSUPPORTED: The test failed because of lack of feature.
+               In this case, the test must call exit_unsupported.
+
+ * XFAIL: The test failed, and was expected to fail.
+          To return XFAIL, call exit_xfail from the test.
+
+There are some sample test scripts for result code under samples/.
+You can also run samples as below:
+
+  # ./ftracetest samples/
+
+TODO
+====
+
+ * Fancy colored output :)
+
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
new file mode 100644
index 000000000..07db5ab09
--- /dev/null
+++ b/tools/testing/selftests/ftrace/config
@@ -0,0 +1,9 @@
+CONFIG_KPROBES=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_STACK_TRACER=y
+CONFIG_HIST_TRIGGERS=y
+CONFIG_PREEMPT_TRACER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_PREEMPTIRQ_DELAY_TEST=m
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
new file mode 100755
index 000000000..f9a9d424c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -0,0 +1,330 @@
+#!/bin/sh
+
+# ftracetest - Ftrace test shell scripts
+#
+# Copyright (C) Hitachi Ltd., 2014
+#  Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+usage() { # errno [message]
+[ ! -z "$2" ] && echo $2
+echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]"
+echo " Options:"
+echo "		-h|--help  Show help message"
+echo "		-k|--keep  Keep passed test logs"
+echo "		-v|--verbose Increase verbosity of test messages"
+echo "		-vv        Alias of -v -v (Show all results in stdout)"
+echo "		-vvv       Alias of -v -v -v (Show all commands immediately)"
+echo "		--fail-unsupported Treat UNSUPPORTED as a failure"
+echo "		-d|--debug Debug mode (trace all shell commands)"
+echo "		-l|--logdir <dir> Save logs on the <dir>"
+echo "		            If <dir> is -, all logs output in console only"
+exit $1
+}
+
+errexit() { # message
+  echo "Error: $1" 1>&2
+  exit 1
+}
+
+# Ensuring user privilege
+if [ `id -u` -ne 0 ]; then
+  errexit "this must be run by root user"
+fi
+
+# Utilities
+absdir() { # file_path
+  (cd `dirname $1`; pwd)
+}
+
+abspath() {
+  echo `absdir $1`/`basename $1`
+}
+
+find_testcases() { #directory
+  echo `find $1 -name \*.tc | sort`
+}
+
+parse_opts() { # opts
+  local OPT_TEST_CASES=
+  local OPT_TEST_DIR=
+
+  while [ ! -z "$1" ]; do
+    case "$1" in
+    --help|-h)
+      usage 0
+    ;;
+    --keep|-k)
+      KEEP_LOG=1
+      shift 1
+    ;;
+    --verbose|-v|-vv|-vvv)
+      VERBOSE=$((VERBOSE + 1))
+      [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1))
+      [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2))
+      shift 1
+    ;;
+    --debug|-d)
+      DEBUG=1
+      shift 1
+    ;;
+    --fail-unsupported)
+      UNSUPPORTED_RESULT=1
+      shift 1
+    ;;
+    --logdir|-l)
+      LOG_DIR=$2
+      shift 2
+    ;;
+    *.tc)
+      if [ -f "$1" ]; then
+        OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`"
+        shift 1
+      else
+        usage 1 "$1 is not a testcase"
+      fi
+      ;;
+    *)
+      if [ -d "$1" ]; then
+        OPT_TEST_DIR=`abspath $1`
+        OPT_TEST_CASES="$OPT_TEST_CASES `find_testcases $OPT_TEST_DIR`"
+        shift 1
+      else
+        usage 1 "Invalid option ($1)"
+      fi
+    ;;
+    esac
+  done
+  if [ ! -z "$OPT_TEST_CASES" ]; then
+    TEST_CASES=$OPT_TEST_CASES
+  fi
+}
+
+# Parameters
+DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1`
+if [ -z "$DEBUGFS_DIR" ]; then
+    TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1`
+else
+    TRACING_DIR=$DEBUGFS_DIR/tracing
+fi
+
+TOP_DIR=`absdir $0`
+TEST_DIR=$TOP_DIR/test.d
+TEST_CASES=`find_testcases $TEST_DIR`
+LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/
+KEEP_LOG=0
+DEBUG=0
+VERBOSE=0
+UNSUPPORTED_RESULT=0
+# Parse command-line options
+parse_opts $*
+
+[ $DEBUG -ne 0 ] && set -x
+
+# Verify parameters
+if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then
+  errexit "No ftrace directory found"
+fi
+
+# Preparing logs
+if [ "x$LOG_DIR" = "x-" ]; then
+  LOG_FILE=
+  date
+else
+  LOG_FILE=$LOG_DIR/ftracetest.log
+  mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+  date > $LOG_FILE
+fi
+
+prlog() { # messages
+  [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE
+}
+catlog() { #file
+  [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE
+}
+prlog "=== Ftrace unit tests ==="
+
+
+# Testcase management
+# Test result codes - Dejagnu extended code
+PASS=0	# The test succeeded.
+FAIL=1	# The test failed, but was expected to succeed.
+UNRESOLVED=2  # The test produced indeterminate results. (e.g. interrupted)
+UNTESTED=3    # The test was not run, currently just a placeholder.
+UNSUPPORTED=4 # The test failed because of lack of feature.
+XFAIL=5	# The test failed, and was expected to fail.
+
+# Accumulations
+PASSED_CASES=
+FAILED_CASES=
+UNRESOLVED_CASES=
+UNTESTED_CASES=
+UNSUPPORTED_CASES=
+XFAILED_CASES=
+UNDEFINED_CASES=
+TOTAL_RESULT=0
+
+INSTANCE=
+CASENO=0
+testcase() { # testfile
+  CASENO=$((CASENO+1))
+  desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:`
+  prlog -n "[$CASENO]$INSTANCE$desc"
+}
+
+test_on_instance() { # testfile
+  grep -q "^#[ \t]*flags:.*instance" $1
+}
+
+eval_result() { # sigval
+  case $1 in
+    $PASS)
+      prlog "	[PASS]"
+      PASSED_CASES="$PASSED_CASES $CASENO"
+      return 0
+    ;;
+    $FAIL)
+      prlog "	[FAIL]"
+      FAILED_CASES="$FAILED_CASES $CASENO"
+      return 1 # this is a bug.
+    ;;
+    $UNRESOLVED)
+      prlog "	[UNRESOLVED]"
+      UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
+      return 1 # this is a kind of bug.. something happened.
+    ;;
+    $UNTESTED)
+      prlog "	[UNTESTED]"
+      UNTESTED_CASES="$UNTESTED_CASES $CASENO"
+      return 0
+    ;;
+    $UNSUPPORTED)
+      prlog "	[UNSUPPORTED]"
+      UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
+      return $UNSUPPORTED_RESULT # depends on use case
+    ;;
+    $XFAIL)
+      prlog "	[XFAIL]"
+      XFAILED_CASES="$XFAILED_CASES $CASENO"
+      return 0
+    ;;
+    *)
+      prlog "	[UNDEFINED]"
+      UNDEFINED_CASES="$UNDEFINED_CASES $CASENO"
+      return 1 # this must be a test bug
+    ;;
+  esac
+}
+
+# Signal handling for result codes
+SIG_RESULT=
+SIG_BASE=36	# Use realtime signals
+SIG_PID=$$
+
+exit_pass () {
+  exit 0
+}
+
+SIG_FAIL=$((SIG_BASE + FAIL))
+exit_fail () {
+  exit 1
+}
+trap 'SIG_RESULT=$FAIL' $SIG_FAIL
+
+SIG_UNRESOLVED=$((SIG_BASE + UNRESOLVED))
+exit_unresolved () {
+  kill -s $SIG_UNRESOLVED $SIG_PID
+  exit 0
+}
+trap 'SIG_RESULT=$UNRESOLVED' $SIG_UNRESOLVED
+
+SIG_UNTESTED=$((SIG_BASE + UNTESTED))
+exit_untested () {
+  kill -s $SIG_UNTESTED $SIG_PID
+  exit 0
+}
+trap 'SIG_RESULT=$UNTESTED' $SIG_UNTESTED
+
+SIG_UNSUPPORTED=$((SIG_BASE + UNSUPPORTED))
+exit_unsupported () {
+  kill -s $SIG_UNSUPPORTED $SIG_PID
+  exit 0
+}
+trap 'SIG_RESULT=$UNSUPPORTED' $SIG_UNSUPPORTED
+
+SIG_XFAIL=$((SIG_BASE + XFAIL))
+exit_xfail () {
+  kill -s $SIG_XFAIL $SIG_PID
+  exit 0
+}
+trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL
+
+__run_test() { # testfile
+  # setup PID and PPID, $$ is not updated.
+  (cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x; initialize_ftrace; . $1)
+  [ $? -ne 0 ] && kill -s $SIG_FAIL $SIG_PID
+}
+
+# Run one test case
+run_test() { # testfile
+  local testname=`basename $1`
+  if [ ! -z "$LOG_FILE" ] ; then
+    local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX`
+  else
+    local testlog=/proc/self/fd/1
+  fi
+  export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX`
+  testcase $1
+  echo "execute$INSTANCE: "$1 > $testlog
+  SIG_RESULT=0
+  if [ -z "$LOG_FILE" ]; then
+    __run_test $1 2>&1
+  elif [ $VERBOSE -ge 3 ]; then
+    __run_test $1 | tee -a $testlog 2>&1
+  elif [ $VERBOSE -eq 2 ]; then
+    __run_test $1 2>> $testlog | tee -a $testlog
+  else
+    __run_test $1 >> $testlog 2>&1
+  fi
+  eval_result $SIG_RESULT
+  if [ $? -eq 0 ]; then
+    # Remove test log if the test was done as it was expected.
+    [ $KEEP_LOG -eq 0 -a ! -z "$LOG_FILE" ] && rm $testlog
+  else
+    [ $VERBOSE -eq 1 -o $VERBOSE -eq 2 ] && catlog $testlog
+    TOTAL_RESULT=1
+  fi
+  rm -rf $TMPDIR
+}
+
+# load in the helper functions
+. $TEST_DIR/functions
+
+# Main loop
+for t in $TEST_CASES; do
+  run_test $t
+done
+
+# Test on instance loop
+INSTANCE=" (instance) "
+for t in $TEST_CASES; do
+  test_on_instance $t || continue
+  SAVED_TRACING_DIR=$TRACING_DIR
+  export TRACING_DIR=`mktemp -d $TRACING_DIR/instances/ftracetest.XXXXXX`
+  run_test $t
+  rmdir $TRACING_DIR
+  TRACING_DIR=$SAVED_TRACING_DIR
+done
+
+prlog ""
+prlog "# of passed: " `echo $PASSED_CASES | wc -w`
+prlog "# of failed: " `echo $FAILED_CASES | wc -w`
+prlog "# of unresolved: " `echo $UNRESOLVED_CASES | wc -w`
+prlog "# of untested: " `echo $UNTESTED_CASES | wc -w`
+prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
+prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
+prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
+
+# if no error, return 0
+exit $TOTAL_RESULT
diff --git a/tools/testing/selftests/ftrace/samples/fail.tc b/tools/testing/selftests/ftrace/samples/fail.tc
new file mode 100644
index 000000000..15e35b956
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/fail.tc
@@ -0,0 +1,4 @@
+#!/bin/sh
+# description: failure-case example
+cat non-exist-file
+echo "this is not executed"
diff --git a/tools/testing/selftests/ftrace/samples/pass.tc b/tools/testing/selftests/ftrace/samples/pass.tc
new file mode 100644
index 000000000..d01549370
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/pass.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: pass-case example
+return 0
diff --git a/tools/testing/selftests/ftrace/samples/unresolved.tc b/tools/testing/selftests/ftrace/samples/unresolved.tc
new file mode 100644
index 000000000..41e99d335
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/unresolved.tc
@@ -0,0 +1,4 @@
+#!/bin/sh
+# description: unresolved-case example
+trap exit_unresolved INT
+kill -INT $PID
diff --git a/tools/testing/selftests/ftrace/samples/unsupported.tc b/tools/testing/selftests/ftrace/samples/unsupported.tc
new file mode 100644
index 000000000..45910ff13
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/unsupported.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: unsupported-case example
+exit_unsupported
diff --git a/tools/testing/selftests/ftrace/samples/untested.tc b/tools/testing/selftests/ftrace/samples/untested.tc
new file mode 100644
index 000000000..35a45946e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/untested.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: untested-case example
+exit_untested
diff --git a/tools/testing/selftests/ftrace/samples/xfail.tc b/tools/testing/selftests/ftrace/samples/xfail.tc
new file mode 100644
index 000000000..9dd395323
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/xfail.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: xfail-case example
+cat non-exist-file || exit_xfail
diff --git a/tools/testing/selftests/ftrace/settings b/tools/testing/selftests/ftrace/settings
new file mode 100644
index 000000000..e7b941753
--- /dev/null
+++ b/tools/testing/selftests/ftrace/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc
new file mode 100644
index 000000000..9980ff14a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: Basic trace file check
+test -f README -a -f trace -a -f tracing_on -a -f trace_pipe
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc
new file mode 100644
index 000000000..531e47236
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc
@@ -0,0 +1,9 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic test for tracers
+# flags: instance
+test -f available_tracers
+for t in `cat available_tracers`; do
+  echo $t > current_tracer
+done
+echo nop > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc
new file mode 100644
index 000000000..58a2506f7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc
@@ -0,0 +1,10 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic trace clock test
+# flags: instance
+test -f trace_clock
+for c in `cat trace_clock | tr  -d \[\]`; do
+  echo $c > trace_clock
+  grep '\['$c'\]' trace_clock
+done
+echo local > trace_clock
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
new file mode 100644
index 000000000..0696098d6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
@@ -0,0 +1,5 @@
+#!/bin/sh
+# description: Basic event tracing check
+test -f available_events -a -f set_event -a -d events
+# check scheduler events are available
+grep -q sched available_events && exit_pass || exit_fail
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
new file mode 100644
index 000000000..3b1f45e13
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
@@ -0,0 +1,28 @@
+#!/bin/sh
+# description: Snapshot and tracing setting
+# flags: instance
+
+[ ! -f snapshot ] && exit_unsupported
+
+echo "Set tracing off"
+echo 0 > tracing_on
+
+echo "Allocate and take a snapshot"
+echo 1 > snapshot
+
+# Since trace buffer is empty, snapshot is also empty, but allocated
+grep -q "Snapshot is allocated" snapshot
+
+echo "Ensure keep tracing off"
+test `cat tracing_on` -eq 0
+
+echo "Set tracing on"
+echo 1 > tracing_on
+
+echo "Take a snapshot again"
+echo 1 > snapshot
+
+echo "Ensure keep tracing on"
+test `cat tracing_on` -eq 1
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
new file mode 100644
index 000000000..9daf03418
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with event level files
+# flags: instance
+
+do_reset() {
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo 'sched:sched_switch' > set_event
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -eq 0 ]; then
+    fail "sched_switch events are not recorded"
+fi
+
+do_reset
+
+echo 1 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -eq 0 ]; then
+    fail "sched_switch events are not recorded"
+fi
+
+do_reset
+
+echo 0 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -ne 0 ]; then
+    fail "sched_switch events should not be recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
new file mode 100644
index 000000000..132478b30
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - restricts events based on pid
+# flags: instance
+
+do_reset() {
+    echo > set_event
+    echo > set_event_pid
+    echo 0 > options/event-fork
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f set_event_pid ]; then
+    echo "event pid filtering is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo 1 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -eq 0 ]; then
+    fail "sched_switch events are not recorded"
+fi
+
+do_reset
+
+read mypid rest < /proc/self/stat
+
+echo $mypid > set_event_pid
+echo 'sched:sched_switch' > set_event
+
+yield
+
+count=`cat trace | grep sched_switch | grep -v "pid=$mypid" | wc -l`
+if [ $count -ne 0 ]; then
+    fail "sched_switch events from other task are recorded"
+fi
+
+do_reset
+
+echo $mypid > set_event_pid
+echo 1 > options/event-fork
+echo 1 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | grep -v "pid=$mypid" | wc -l`
+if [ $count -eq 0 ]; then
+    fail "sched_switch events from other task are not recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
new file mode 100644
index 000000000..6a37a8642
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with subsystem level files
+# flags: instance
+
+do_reset() {
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo 'sched:*' > set_event
+
+yield
+
+count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+if [ $count -lt 3 ]; then
+    fail "at least fork, exec and exit events should be recorded"
+fi
+
+do_reset
+
+echo 1 > events/sched/enable
+
+yield
+
+count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+if [ $count -lt 3 ]; then
+    fail "at least fork, exec and exit events should be recorded"
+fi
+
+do_reset
+
+echo 0 > events/sched/enable
+
+yield
+
+count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+if [ $count -ne 0 ]; then
+    fail "any of scheduler events should not be recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
new file mode 100644
index 000000000..4e9b6e2c0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
@@ -0,0 +1,65 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with top level files
+
+do_reset() {
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo '*:*' > set_event
+
+yield
+
+echo 0 > tracing_on
+
+count=`head -n 128 trace | grep -v ^# | wc -l`
+if [ $count -eq 0 ]; then
+    fail "none of events are recorded"
+fi
+
+do_reset
+
+echo 1 > events/enable
+echo 1 > tracing_on
+
+yield
+
+echo 0 > tracing_on
+count=`head -n 128 trace | grep -v ^# | wc -l`
+if [ $count -eq 0 ]; then
+    fail "none of events are recorded"
+fi
+
+do_reset
+
+echo 0 > events/enable
+
+yield
+
+count=`cat trace | grep -v ^# | wc -l`
+if [ $count -ne 0 ]; then
+    fail "any of events should not be recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
new file mode 100644
index 000000000..1aec99d10
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
@@ -0,0 +1,92 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters with stack tracer
+
+# Make sure that function graph filtering works, and is not
+# affected by other tracers enabled (like stack tracer)
+
+if ! grep -q function_graph available_tracers; then
+    echo "no function graph tracer configured"
+    exit_unsupported
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+    echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+    exit_unsupported
+fi
+
+do_reset() {
+    reset_tracer
+    if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
+	    echo 0 > /proc/sys/kernel/stack_tracer_enabled
+    fi
+    enable_tracing
+    clear_trace
+    echo > set_ftrace_filter
+}
+
+fail() { # msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+disable_tracing
+clear_trace;
+
+# filter something, schedule is always good
+if ! echo "schedule" > set_ftrace_filter; then
+    # test for powerpc 64
+    if ! echo ".schedule" > set_ftrace_filter; then
+	fail "can not enable schedule filter"
+    fi
+fi
+
+echo function_graph > current_tracer
+
+if [ ! -f stack_trace ]; then
+    echo "Stack tracer not configured"
+    do_reset
+    exit_unsupported;
+fi
+
+echo "Now testing with stack tracer"
+
+echo 1 > /proc/sys/kernel/stack_tracer_enabled
+
+disable_tracing
+clear_trace
+enable_tracing
+sleep 1
+
+count=`cat trace | grep '()' | grep -v schedule | wc -l`
+
+if [ $count -ne 0 ]; then
+    fail "Graph filtering not working with stack tracer?"
+fi
+
+# Make sure we did find something
+count=`cat trace | grep 'schedule()' | wc -l` 
+if [ $count -eq 0 ]; then
+    fail "No schedule traces found?"
+fi
+
+echo 0 > /proc/sys/kernel/stack_tracer_enabled
+clear_trace
+sleep 1
+
+
+count=`cat trace | grep '()' | grep -v schedule | wc -l`
+
+if [ $count -ne 0 ]; then
+    fail "Graph filtering not working after stack tracer disabled?"
+fi
+
+count=`cat trace | grep 'schedule()' | wc -l` 
+if [ $count -eq 0 ]; then
+    fail "No schedule traces found?"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
new file mode 100644
index 000000000..9f8d27ca3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
@@ -0,0 +1,53 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters
+
+# Make sure that function graph filtering works
+
+if ! grep -q function_graph available_tracers; then
+    echo "no function graph tracer configured"
+    exit_unsupported
+fi
+
+do_reset() {
+    reset_tracer
+    enable_tracing
+    clear_trace
+}
+
+fail() { # msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+disable_tracing
+clear_trace
+
+# filter something, schedule is always good
+if ! echo "schedule" > set_ftrace_filter; then
+    # test for powerpc 64
+    if ! echo ".schedule" > set_ftrace_filter; then
+	fail "can not enable schedule filter"
+    fi
+fi
+
+echo function_graph > current_tracer
+enable_tracing
+sleep 1
+# search for functions (has "()" on the line), and make sure
+# that only the schedule function was found
+count=`cat trace | grep '()' | grep -v schedule | wc -l`
+if [ $count -ne 0 ]; then
+    fail "Graph filtering not working by itself?"
+fi
+
+# Make sure we did find something
+count=`cat trace | grep 'schedule()' | wc -l` 
+if [ $count -eq 0 ]; then
+    fail "No schedule traces found?"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
new file mode 100644
index 000000000..f4e92afab
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function glob filters
+
+# Make sure that function glob matching filter works.
+
+if ! grep -q function available_tracers; then
+    echo "no function tracer configured"
+    exit_unsupported
+fi
+
+disable_tracing
+clear_trace
+
+ftrace_filter_check() { # glob grep
+  echo "$1" > set_ftrace_filter
+  cut -f1 -d" " set_ftrace_filter > $TMPDIR/actual
+  cut -f1 -d" " available_filter_functions | grep "$2" > $TMPDIR/expected
+  DIFF=`diff $TMPDIR/actual $TMPDIR/expected`
+  test -z "$DIFF"
+}
+
+# filter by *, front match
+ftrace_filter_check '*schedule' '^.*schedule$'
+
+# filter by *, middle match
+ftrace_filter_check '*schedule*' '^.*schedule.*$'
+
+# filter by *, end match
+ftrace_filter_check 'schedule*' '^schedule.*$'
+
+# filter by *mid*end
+ftrace_filter_check '*pin*lock' '.*pin.*lock$'
+
+# filter by start*mid*
+ftrace_filter_check 'mutex*try*' '^mutex.*try.*'
+
+# Advanced full-glob matching feature is recently supported.
+# Skip the tests if we are sure the kernel does not support it.
+if grep -q 'accepts: .* glob-matching-pattern' README ; then
+
+# filter by *, both side match
+ftrace_filter_check 'sch*ule' '^sch.*ule$'
+
+# filter by char class.
+ftrace_filter_check '[Ss]y[Ss]_*' '^[Ss]y[Ss]_.*$'
+
+# filter by ?, schedule is always good
+if ! echo "sch?dule" > set_ftrace_filter; then
+    # test for powerpc 64
+    if ! echo ".sch?dule" > set_ftrace_filter; then
+	fail "can not enable schedule filter"
+    fi
+    cat set_ftrace_filter | grep '^.schedule$'
+else
+    cat set_ftrace_filter | grep '^schedule$'
+fi
+
+fi
+
+echo > set_ftrace_filter
+enable_tracing
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
new file mode 100644
index 000000000..524ce24b3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -0,0 +1,118 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function pid filters
+
+# Make sure that function pid matching filter works.
+# Also test it on an instance directory
+
+if ! grep -q function available_tracers; then
+    echo "no function tracer configured"
+    exit_unsupported
+fi
+
+if [ ! -f set_ftrace_pid ]; then
+    echo "set_ftrace_pid not found? Is function tracer not set?"
+    exit_unsupported
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+    echo "set_ftrace_filter not found? Is function tracer not set?"
+    exit_unsupported
+fi
+
+do_function_fork=1
+
+if [ ! -f options/function-fork ]; then
+    do_function_fork=0
+    echo "no option for function-fork found. Option will not be tested."
+fi
+
+read PID _ < /proc/self/stat
+
+if [ $do_function_fork -eq 1 ]; then
+    # default value of function-fork option
+    orig_value=`grep function-fork trace_options`
+fi
+
+do_reset() {
+    reset_tracer
+    clear_trace
+    enable_tracing
+    echo > set_ftrace_filter
+    echo > set_ftrace_pid
+
+    if [ $do_function_fork -eq 0 ]; then
+	return
+    fi
+
+    echo $orig_value > trace_options
+}
+
+fail() { # msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+yield() {
+    ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+do_test() {
+    disable_tracing
+
+    echo do_execve* > set_ftrace_filter
+    echo *do_fork >> set_ftrace_filter
+
+    echo $PID > set_ftrace_pid
+    echo function > current_tracer
+
+    if [ $do_function_fork -eq 1 ]; then
+	# don't allow children to be traced
+	echo nofunction-fork > trace_options
+    fi
+
+    enable_tracing
+    yield
+
+    count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+    count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+    # count_other should be 0
+    if [ $count_pid -eq 0 -o $count_other -ne 0 ]; then
+	fail "PID filtering not working?"
+    fi
+
+    disable_tracing
+    clear_trace
+
+    if [ $do_function_fork -eq 0 ]; then
+	return
+    fi
+
+    # allow children to be traced
+    echo function-fork > trace_options
+
+    enable_tracing
+    yield
+
+    count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+    count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+    # count_other should NOT be 0
+    if [ $count_pid -eq 0 -o $count_other -eq 0 ]; then
+	fail "PID filtering not following fork?"
+    fi
+}
+
+do_test
+
+mkdir instances/foo
+cd instances/foo
+do_test
+cd ../../
+rmdir instances/foo
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
new file mode 100644
index 000000000..6fed4cf2d
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -0,0 +1,123 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - test for function event triggers
+# flags: instance
+#
+# Ftrace allows to add triggers to functions, such as enabling or disabling
+# tracing, enabling or disabling trace events, or recording a stack trace
+# within the ring buffer.
+#
+# This test is designed to test event triggers
+#
+
+# The triggers are set within the set_ftrace_filter file
+if [ ! -f set_ftrace_filter ]; then
+    echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+    exit_unsupported
+fi
+
+do_reset() {
+    reset_ftrace_filter
+    reset_tracer
+    disable_events
+    clear_trace
+    enable_tracing
+}
+
+fail() { # mesg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+SLEEP_TIME=".1"
+
+do_reset
+
+echo "Testing function probes with events:"
+
+EVENT="sched:sched_switch"
+EVENT_ENABLE="events/sched/sched_switch/enable"
+
+cnt_trace() {
+    grep -v '^#' trace | wc -l
+}
+
+test_event_enabled() {
+    val=$1
+
+    e=`cat $EVENT_ENABLE`
+    if [ "$e" != $val ]; then
+	fail "Expected $val but found $e"
+    fi
+}
+
+run_enable_disable() {
+    enable=$1			# enable
+    Enable=$2			# Enable
+    check_disable=$3		# 0
+    check_enable_star=$4	# 1*
+    check_disable_star=$5	# 0*
+
+    cnt=`cnt_trace`
+    if [ $cnt -ne 0 ]; then
+	fail "Found junk in trace file"
+    fi
+
+    echo "$Enable event all the time"
+
+    echo $check_disable > $EVENT_ENABLE
+    sleep $SLEEP_TIME
+
+    test_event_enabled $check_disable
+
+    echo "schedule:${enable}_event:$EVENT" > set_ftrace_filter
+    if [ -d ../../instances ]; then # Check instances
+	cur=`cat set_ftrace_filter`
+	top=`cat ../../set_ftrace_filter`
+	if [ "$cur" = "$top" ]; then
+	    echo "This kernel is too old to support per instance filter"
+	    reset_ftrace_filter
+	    exit_unsupported
+	fi
+    fi
+
+    echo " make sure it works 5 times"
+
+    for i in `seq 5`; do
+	sleep $SLEEP_TIME
+	echo "  test $i"
+	test_event_enabled $check_enable_star
+
+	echo $check_disable > $EVENT_ENABLE
+    done
+    sleep $SLEEP_TIME
+    echo " make sure it's still works"
+    test_event_enabled $check_enable_star
+
+    reset_ftrace_filter
+
+    echo " make sure it only works 3 times"
+
+    echo $check_disable > $EVENT_ENABLE
+    sleep $SLEEP_TIME
+
+    echo "schedule:${enable}_event:$EVENT:3" > set_ftrace_filter
+
+    for i in `seq 3`; do
+	sleep $SLEEP_TIME
+	echo "  test $i"
+	test_event_enabled $check_enable_star
+
+	echo $check_disable > $EVENT_ENABLE
+    done
+
+    sleep $SLEEP_TIME
+    echo " make sure it stop working"
+    test_event_enabled $check_disable_star
+
+    do_reset
+}
+
+run_enable_disable enable Enable 0 "1*" "0*"
+run_enable_disable disable Disable 1 "0*" "1*"
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
new file mode 100644
index 000000000..b2d5a8feb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
@@ -0,0 +1,81 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function profiler with function tracing
+
+# There was a bug after a rewrite of the ftrace infrastructure that
+# caused the function_profiler not to be able to run with the function
+# tracer, because the function_profiler used the function_graph tracer
+# and it was assumed the two could not run simultaneously.
+#
+# There was another related bug where the solution to the first bug
+# broke the way filtering of the function tracer worked.
+#
+# This test triggers those bugs on those kernels.
+#
+# We need function_graph and profiling to to run this test
+if ! grep -q function_graph available_tracers; then
+    echo "no function graph tracer configured"
+    exit_unsupported;
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+    echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+    exit_unsupported
+fi
+
+if [ ! -f function_profile_enabled ]; then
+    echo "function_profile_enabled not found, function profiling enabled?"
+    exit_unsupported
+fi
+
+fail() { # mesg
+    reset_tracer
+    echo > set_ftrace_filter
+    echo $1
+    exit_fail
+}
+
+echo "Testing function tracer with profiler:"
+echo "enable function tracer"
+echo function > current_tracer
+echo "enable profiler"
+echo 1 > function_profile_enabled
+
+sleep 1
+
+echo "Now filter on just schedule"
+echo '*schedule' > set_ftrace_filter
+clear_trace
+
+echo "Now disable function profiler"
+echo 0 > function_profile_enabled
+
+sleep 1
+
+# make sure only schedule functions exist
+
+echo "testing if only schedule is being traced"
+if grep -v -e '^#' -e 'schedule' trace; then
+	fail "more than schedule was found"
+fi
+
+echo "Make sure schedule was traced"
+if ! grep -e 'schedule' trace > /dev/null; then
+	cat trace
+	fail "can not find schedule in trace"
+fi
+
+echo > set_ftrace_filter
+clear_trace
+
+sleep 1
+
+echo "make sure something other than scheduler is being traced"
+if ! grep -v -e '^#' -e 'schedule' trace > /dev/null; then
+	cat trace
+	fail "no other functions besides schedule was found"
+fi
+
+reset_tracer
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
new file mode 100644
index 000000000..68e7a48f5
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -0,0 +1,170 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - test reading of set_ftrace_filter
+#
+# The set_ftrace_filter file of ftrace is used to list functions as well as
+# triggers (probes) attached to functions. The code to read this file is not
+# straight forward and has had various bugs in the past. This test is designed
+# to add functions and triggers to that file in various ways and read that
+# file in various ways (cat vs dd).
+#
+
+# The triggers are set within the set_ftrace_filter file
+if [ ! -f set_ftrace_filter ]; then
+    echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+    exit_unsupported
+fi
+
+do_reset() {
+    reset_tracer
+    reset_ftrace_filter
+    disable_events
+    clear_trace
+    enable_tracing
+}
+
+fail() { # mesg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+do_reset
+
+FILTER=set_ftrace_filter
+FUNC1="schedule"
+FUNC2="do_softirq"
+
+ALL_FUNCS="#### all functions enabled ####"
+
+test_func() {
+    if ! echo "$1" | grep -q "^$2\$"; then
+	return 0
+    fi
+    echo "$1" | grep -v "^$2\$"
+    return 1
+}
+
+check_set_ftrace_filter() {
+    cat=`cat $FILTER`
+    dd1=`dd if=$FILTER bs=1 | grep -v -e 'records in' -e 'records out' -e 'bytes copied'`
+    dd100=`dd if=$FILTER bs=100 | grep -v -e 'records in' -e 'records out' -e 'bytes copied'`
+
+    echo "Testing '$@'"
+
+    while [ $# -gt 0 ]; do
+	echo "test $1"
+	if cat=`test_func "$cat" "$1"`; then
+	    return 0
+	fi
+	if dd1=`test_func "$dd1" "$1"`; then
+	    return 0
+	fi
+	if dd100=`test_func "$dd100" "$1"`; then
+	    return 0
+	fi
+	shift
+    done
+
+    if [ -n "$cat" ]; then
+	return 0
+    fi
+    if [ -n "$dd1" ]; then
+	return 0
+    fi
+    if [ -n "$dd100" ]; then
+	return 0
+    fi
+    return 1;
+}
+
+if check_set_ftrace_filter "$ALL_FUNCS"; then
+    fail "Expected only $ALL_FUNCS"
+fi
+
+echo "$FUNC1:traceoff" > set_ftrace_filter
+if check_set_ftrace_filter "$ALL_FUNCS" "$FUNC1:traceoff:unlimited"; then
+    fail "Expected $ALL_FUNCS and $FUNC1:traceoff:unlimited"
+fi
+
+echo "$FUNC1" > set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC1:traceoff:unlimited"; then
+    fail "Expected $FUNC1 and $FUNC1:traceoff:unlimited"
+fi
+
+echo "$FUNC2" >> set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC2" "$FUNC1:traceoff:unlimited"; then
+    fail "Expected $FUNC1 $FUNC2 and $FUNC1:traceoff:unlimited"
+fi
+
+echo "$FUNC2:traceoff" >> set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC2" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then
+    fail "Expected $FUNC1 $FUNC2 $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited"
+fi
+
+echo "$FUNC1" > set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then
+    fail "Expected $FUNC1 $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited"
+fi
+
+echo > set_ftrace_filter
+if check_set_ftrace_filter "$ALL_FUNCS" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then
+    fail "Expected $ALL_FUNCS $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited"
+fi
+
+reset_ftrace_filter
+
+if check_set_ftrace_filter "$ALL_FUNCS"; then
+    fail "Expected $ALL_FUNCS"
+fi
+
+echo "$FUNC1" > set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" ; then
+    fail "Expected $FUNC1"
+fi
+
+echo "$FUNC2" >> set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC2" ; then
+    fail "Expected $FUNC1 and $FUNC2"
+fi
+
+test_actual() { # Compares $TMPDIR/expected with set_ftrace_filter
+    cat set_ftrace_filter | grep -v '#' | cut -d' ' -f1 | cut -d':' -f1 | sort -u > $TMPDIR/actual
+    DIFF=`diff $TMPDIR/actual $TMPDIR/expected`
+    test -z "$DIFF"
+}
+
+# Set traceoff trigger for all fuctions with "lock" in their name
+cat available_filter_functions | cut -d' ' -f1 |  grep 'lock' | sort -u > $TMPDIR/expected
+echo '*lock*:traceoff' > set_ftrace_filter
+test_actual
+
+# now remove all with 'try' in it, and end with lock
+grep -v 'try.*lock$' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!*try*lock:traceoff' >> set_ftrace_filter
+test_actual
+
+# remove all that start with "m" and end with "lock"
+grep -v '^m.*lock$' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!m*lock:traceoff' >> set_ftrace_filter
+test_actual
+
+# remove all that start with "c" and have "unlock"
+grep -v '^c.*unlock' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!c*unlock*:traceoff' >> set_ftrace_filter
+test_actual
+
+# clear all the rest
+> $TMPDIR/expected
+echo '!*:traceoff' >> set_ftrace_filter
+test_actual
+
+rm $TMPDIR/expected
+rm $TMPDIR/actual
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
new file mode 100644
index 000000000..f6d9ac732
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
@@ -0,0 +1,186 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - test for function traceon/off triggers
+# flags: instance
+#
+# Ftrace allows to add triggers to functions, such as enabling or disabling
+# tracing, enabling or disabling trace events, or recording a stack trace
+# within the ring buffer.
+#
+# This test is designed to test enabling and disabling tracing triggers
+#
+
+# The triggers are set within the set_ftrace_filter file
+if [ ! -f set_ftrace_filter ]; then
+    echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+    exit_unsupported
+fi
+
+do_reset() {
+    reset_ftrace_filter
+    reset_tracer
+    disable_events
+    clear_trace
+    enable_tracing
+}
+
+fail() { # mesg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+SLEEP_TIME=".1"
+
+do_reset
+
+echo "Testing function probes with enabling disabling tracing:"
+
+cnt_trace() {
+    grep -v '^#' trace | wc -l
+}
+
+echo '** DISABLE TRACING'
+disable_tracing
+clear_trace
+
+cnt=`cnt_trace`
+if [ $cnt -ne 0 ]; then
+    fail "Found junk in trace"
+fi
+
+
+echo '** ENABLE EVENTS'
+
+echo 1 > events/enable
+
+echo '** ENABLE TRACING'
+enable_tracing
+
+cnt=`cnt_trace`
+if [ $cnt -eq 0 ]; then
+   fail "Nothing found in trace"
+fi
+
+# powerpc uses .schedule
+func="schedule"
+available_file=available_filter_functions
+if [ -d ../../instances -a -f ../../available_filter_functions ]; then
+   available_file=../../available_filter_functions
+fi
+x=`grep '^\.schedule$' available_filter_functions | wc -l`
+if [ "$x" -eq 1 ]; then
+   func=".schedule"
+fi
+
+echo '** SET TRACEOFF'
+
+echo "$func:traceoff" > set_ftrace_filter
+if [ -d ../../instances ]; then # Check instances
+    cur=`cat set_ftrace_filter`
+    top=`cat ../../set_ftrace_filter`
+    if [ "$cur" = "$top" ]; then
+	echo "This kernel is too old to support per instance filter"
+	reset_ftrace_filter
+	exit_unsupported
+    fi
+fi
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 1 ]; then
+   fail "Did not find traceoff trigger"
+fi
+
+cnt=`cnt_trace`
+sleep $SLEEP_TIME
+cnt2=`cnt_trace`
+
+if [ $cnt -ne $cnt2 ]; then
+   fail "Tracing is not stopped"
+fi
+
+on=`cat tracing_on`
+if [ $on != "0" ]; then
+    fail "Tracing is not off"
+fi
+
+csum1=`md5sum trace`
+sleep $SLEEP_TIME
+csum2=`md5sum trace`
+
+if [ "$csum1" != "$csum2" ]; then
+    fail "Tracing file is still changing"
+fi
+
+clear_trace
+
+cnt=`cnt_trace`
+if [ $cnt -ne 0 ]; then
+    fail "Tracing is still happeing"
+fi
+
+echo "!$func:traceoff" >> set_ftrace_filter
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 0 ]; then
+    fail "traceoff trigger still exists"
+fi
+
+on=`cat tracing_on`
+if [ $on != "0" ]; then
+    fail "Tracing is started again"
+fi
+
+echo "$func:traceon" > set_ftrace_filter
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 1 ]; then
+    fail "traceon trigger not found"
+fi
+
+cnt=`cnt_trace`
+if [ $cnt -eq 0 ]; then
+   fail "Tracing did not start"
+fi
+
+on=`cat tracing_on`
+if [ $on != "1" ]; then
+    fail "Tracing was not enabled"
+fi
+
+
+echo "!$func:traceon" >> set_ftrace_filter
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 0 ]; then
+   fail "traceon trigger still exists"
+fi
+
+check_sleep() {
+    val=$1
+    sleep $SLEEP_TIME
+    cat set_ftrace_filter
+    on=`cat tracing_on`
+    if [ $on != "$val" ]; then
+	fail "Expected tracing_on to be $val, but it was $on"
+    fi
+}
+
+
+echo "$func:traceoff:3" > set_ftrace_filter
+check_sleep "0"
+echo 1 > tracing_on
+check_sleep "0"
+echo 1 > tracing_on
+check_sleep "0"
+echo 1 > tracing_on
+check_sleep "1"
+echo "!$func:traceoff:0" > set_ftrace_filter
+
+if grep -e traceon -e traceoff set_ftrace_filter; then
+    fail "Tracing on and off triggers still exist"
+fi
+
+disable_events
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
new file mode 100644
index 000000000..e4645d5e3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -0,0 +1,100 @@
+
+clear_trace() { # reset trace output
+    echo > trace
+}
+
+disable_tracing() { # stop trace recording
+    echo 0 > tracing_on
+}
+
+enable_tracing() { # start trace recording
+    echo 1 > tracing_on
+}
+
+reset_tracer() { # reset the current tracer
+    echo nop > current_tracer
+}
+
+reset_trigger_file() {
+    # remove action triggers first
+    grep -H ':on[^:]*(' $@ |
+    while read line; do
+        cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+	file=`echo $line | cut -f1 -d:`
+	echo "!$cmd" >> $file
+    done
+    grep -Hv ^# $@ |
+    while read line; do
+        cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+	file=`echo $line | cut -f1 -d:`
+	echo "!$cmd" > $file
+    done
+}
+
+reset_trigger() { # reset all current setting triggers
+    if [ -d events/synthetic ]; then
+        reset_trigger_file events/synthetic/*/trigger
+    fi
+    reset_trigger_file events/*/*/trigger
+}
+
+reset_events_filter() { # reset all current setting filters
+    grep -v ^none events/*/*/filter |
+    while read line; do
+	echo 0 > `echo $line | cut -f1 -d:`
+    done
+}
+
+reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
+    echo > set_ftrace_filter
+    grep -v '^#' set_ftrace_filter | while read t; do
+	tr=`echo $t | cut -d: -f2`
+	if [ "$tr" = "" ]; then
+	    continue
+	fi
+	if ! grep -q "$t" set_ftrace_filter; then
+		continue;
+	fi
+	name=`echo $t | cut -d: -f1 | cut -d' ' -f1`
+	if [ $tr = "enable_event" -o $tr = "disable_event" ]; then
+	    tr=`echo $t | cut -d: -f2-4`
+	    limit=`echo $t | cut -d: -f5`
+	else
+	    tr=`echo $t | cut -d: -f2`
+	    limit=`echo $t | cut -d: -f3`
+	fi
+	if [ "$limit" != "unlimited" ]; then
+	    tr="$tr:$limit"
+	fi
+	echo "!$name:$tr" > set_ftrace_filter
+    done
+}
+
+disable_events() {
+    echo 0 > events/enable
+}
+
+clear_synthetic_events() { # reset all current synthetic events
+    grep -v ^# synthetic_events |
+    while read line; do
+        echo "!$line" >> synthetic_events
+    done
+}
+
+initialize_ftrace() { # Reset ftrace to initial-state
+# As the initial state, ftrace will be set to nop tracer,
+# no events, no triggers, no filters, no function filters,
+# no probes, and tracing on.
+    disable_tracing
+    reset_tracer
+    reset_trigger
+    reset_events_filter
+    disable_events
+    echo > set_event_pid	# event tracer is always on
+    [ -f set_ftrace_filter ] && echo | tee set_ftrace_*
+    [ -f set_graph_function ] && echo | tee set_graph_*
+    [ -f stack_trace_filter ] && echo > stack_trace_filter
+    [ -f kprobe_events ] && echo > kprobe_events
+    [ -f uprobe_events ] && echo > uprobe_events
+    enable_tracing
+}
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
new file mode 100644
index 000000000..4fa0f7914
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
@@ -0,0 +1,146 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test creation and deletion of trace instances while setting an event
+
+if [ ! -d instances ] ; then
+    echo "no instance directory with this kernel"
+    exit_unsupported;
+fi
+
+fail() { # mesg
+    rmdir foo 2>/dev/null
+    echo $1
+    set -e
+    exit_fail
+}
+
+cd instances
+
+# we don't want to fail on error
+set +e
+
+mkdir x
+rmdir x
+result=$?
+
+if [ $result -ne 0 ]; then
+    echo "instance rmdir not supported"
+    exit_unsupported
+fi
+
+instance_slam() {
+        while :; do
+                mkdir foo 2> /dev/null
+                rmdir foo 2> /dev/null
+        done
+}
+
+instance_read() {
+        while :; do
+                cat foo/trace 1> /dev/null 2>&1
+        done
+}
+
+instance_set() {
+        while :; do
+                echo 1 > foo/events/sched/sched_switch
+        done 2> /dev/null
+}
+
+instance_slam &
+p1=$!
+echo $p1
+
+instance_set &
+p2=$!
+echo $p2
+
+instance_read &
+p3=$!
+echo $p3
+
+sleep 1
+
+kill -1 $p3
+kill -1 $p2
+kill -1 $p1
+
+echo "Wait for processes to finish"
+wait $p1 $p2 $p3
+echo "all processes finished, wait for cleanup"
+sleep 1
+
+mkdir foo
+ls foo > /dev/null
+rmdir foo
+if [ -d foo ]; then
+        fail "foo still exists"
+fi
+
+mkdir foo
+echo "schedule:enable_event:sched:sched_switch" > foo/set_ftrace_filter
+rmdir foo
+if [ -d foo ]; then
+        fail "foo still exists"
+fi
+if grep -q "schedule:enable_event:sched:sched_switch" ../set_ftrace_filter; then
+	echo "Older kernel detected. Cleanup filter"
+	echo '!schedule:enable_event:sched:sched_switch' > ../set_ftrace_filter
+fi
+
+instance_slam() {
+    while :; do
+	mkdir x
+	mkdir y
+	mkdir z
+	rmdir x
+	rmdir y
+	rmdir z
+    done 2>/dev/null
+}
+
+instance_slam &
+p1=$!
+echo $p1
+
+instance_slam &
+p2=$!
+echo $p2
+
+instance_slam &
+p3=$!
+echo $p3
+
+instance_slam &
+p4=$!
+echo $p4
+
+instance_slam &
+p5=$!
+echo $p5
+
+ls -lR >/dev/null
+sleep 1
+
+kill -1 $p1
+kill -1 $p2
+kill -1 $p3
+kill -1 $p4
+kill -1 $p5
+
+echo "Wait for processes to finish"
+wait $p1 $p2 $p3 $p4 $p5
+echo "all processes finished, wait for cleanup"
+
+mkdir x y z
+ls x y z
+rmdir x y z
+for d in x y z; do
+        if [ -d $d ]; then
+                fail "instance $d still exists"
+        fi
+done
+
+set -e
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance.tc b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
new file mode 100644
index 000000000..b84651283
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
@@ -0,0 +1,86 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test creation and deletion of trace instances
+
+if [ ! -d instances ] ; then
+    echo "no instance directory with this kernel"
+    exit_unsupported;
+fi
+
+fail() { # mesg
+    rmdir x y z 2>/dev/null
+    echo $1
+    set -e
+    exit_fail
+}
+
+cd instances
+
+# we don't want to fail on error
+set +e
+
+mkdir x
+rmdir x
+result=$?
+
+if [ $result -ne 0 ]; then
+    echo "instance rmdir not supported"
+    exit_unsupported
+fi
+
+instance_slam() {
+    while :; do
+	mkdir x
+	mkdir y
+	mkdir z
+	rmdir x
+	rmdir y
+	rmdir z
+    done 2>/dev/null
+}
+
+instance_slam &
+p1=$!
+echo $p1
+
+instance_slam &
+p2=$!
+echo $p2
+
+instance_slam &
+p3=$!
+echo $p3
+
+instance_slam &
+p4=$!
+echo $p4
+
+instance_slam &
+p5=$!
+echo $p5
+
+ls -lR >/dev/null
+sleep 1
+
+kill -1 $p1
+kill -1 $p2
+kill -1 $p3
+kill -1 $p4
+kill -1 $p5
+
+echo "Wait for processes to finish"
+wait $p1 $p2 $p3 $p4 $p5
+echo "all processes finished, wait for cleanup"
+
+mkdir x y z
+ls x y z
+rmdir x y z
+for d in x y z; do
+        if [ -d $d ]; then
+                fail "instance $d still exists"
+        fi
+done
+
+set -e
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
new file mode 100644
index 000000000..4604d2103
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - adding and removing
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo p:myevent _do_fork > kprobe_events
+grep myevent kprobe_events
+test -d events/kprobes/myevent
+echo > kprobe_events
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
new file mode 100644
index 000000000..bbc443a91
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - busy event check
+
+[ -f kprobe_events ] || exit_unsupported
+
+echo 0 > events/enable
+echo > kprobe_events
+echo p:myevent _do_fork > kprobe_events
+test -d events/kprobes/myevent
+echo 1 > events/kprobes/myevent/enable
+echo > kprobe_events && exit_fail # this must fail
+echo 0 > events/kprobes/myevent/enable
+echo > kprobe_events # this must succeed
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
new file mode 100644
index 000000000..8b43c6804
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event with arguments
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
+grep testprobe kprobe_events
+test -d events/kprobes/testprobe
+echo 1 > events/kprobes/testprobe/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe/enable
+echo "-:testprobe" >> kprobe_events
+clear_trace
+test -d events/kprobes/testprobe && exit_fail || exit_pass
+
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
new file mode 100644
index 000000000..1ad70cdaf
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event string type argument
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+
+case `uname -m` in
+x86_64)
+  ARG1=%di
+;;
+i[3456]86)
+  ARG1=%ax
+;;
+aarch64)
+  ARG1=%x0
+;;
+arm*)
+  ARG1=%r0
+;;
+ppc64*)
+  ARG1=%r3
+;;
+ppc*)
+  ARG1=%r3
+;;
+*)
+  echo "Please implement other architecture here"
+  exit_untested
+esac
+
+: "Test get argument (1)"
+echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+echo "p:test _do_fork" >> kprobe_events
+grep -qe "testprobe.* arg1=\"test\"" trace
+
+echo 0 > events/kprobes/testprobe/enable
+: "Test get argument (2)"
+echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+echo "p:test _do_fork" >> kprobe_events
+grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
+
+echo 0 > events/enable
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
new file mode 100644
index 000000000..92ffb3bd3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -0,0 +1,107 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event argument syntax
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
+
+echo 0 > events/enable
+echo > kprobe_events
+
+PROBEFUNC="vfs_read"
+GOODREG=
+BADREG=
+GOODSYM="_sdata"
+if ! grep -qw ${GOODSYM} /proc/kallsyms ; then
+  GOODSYM=$PROBEFUNC
+fi
+BADSYM="deaqswdefr"
+SYMADDR=0x`grep -w ${GOODSYM} /proc/kallsyms | cut -f 1 -d " "`
+GOODTYPE="x16"
+BADTYPE="y16"
+
+case `uname -m` in
+x86_64|i[3456]86)
+  GOODREG=%ax
+  BADREG=%ex
+;;
+aarch64)
+  GOODREG=%x0
+  BADREG=%ax
+;;
+arm*)
+  GOODREG=%r0
+  BADREG=%ax
+;;
+ppc*)
+  GOODREG=%r3
+  BADREG=%msr
+;;
+*)
+  echo "Please implement other architecture here"
+  exit_untested
+esac
+
+test_goodarg() # Good-args
+{
+  while [ "$1" ]; do
+    echo "p ${PROBEFUNC} $1" > kprobe_events
+    shift 1
+  done;
+}
+
+test_badarg() # Bad-args
+{
+  while [ "$1" ]; do
+    ! echo "p ${PROBEFUNC} $1" > kprobe_events
+    shift 1
+  done;
+}
+
+echo > kprobe_events
+
+: "Register access"
+test_goodarg ${GOODREG}
+test_badarg ${BADREG}
+
+: "Symbol access"
+test_goodarg "@${GOODSYM}" "@${SYMADDR}" "@${GOODSYM}+10" "@${GOODSYM}-10"
+test_badarg "@" "@${BADSYM}" "@${GOODSYM}*10" "@${GOODSYM}/10" \
+	    "@${GOODSYM}%10" "@${GOODSYM}&10" "@${GOODSYM}|10"
+
+: "Stack access"
+test_goodarg "\$stack" "\$stack0" "\$stack1"
+test_badarg "\$stackp" "\$stack0+10" "\$stack1-10"
+
+: "Retval access"
+echo "r ${PROBEFUNC} \$retval" > kprobe_events
+! echo "p ${PROBEFUNC} \$retval" > kprobe_events
+
+# $comm was introduced in 4.8, older kernels reject it.
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
+: "Comm access"
+test_goodarg "\$comm"
+fi
+
+: "Indirect memory access"
+test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \
+	"+0(\$stack1)" "+10(@${GOODSYM}-10)" "+0(+10(+20(\$stack)))"
+test_badarg "+(${GOODREG})" "(${GOODREG}+10)" "-(${GOODREG})" "(${GOODREG})" \
+	"+10(\$comm)" "+0(${GOODREG})+10"
+
+: "Name assignment"
+test_goodarg "varname=${GOODREG}"
+test_badarg "varname=varname2=${GOODREG}"
+
+: "Type syntax"
+test_goodarg "${GOODREG}:${GOODTYPE}"
+test_badarg "${GOODREG}::${GOODTYPE}" "${GOODREG}:${BADTYPE}" \
+	"${GOODTYPE}:${GOODREG}"
+
+: "Combination check"
+
+test_goodarg "\$comm:string" "+0(\$stack):string"
+test_badarg "\$comm:x64" "\$stack:string" "${GOODREG}:string"
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
new file mode 100644
index 000000000..2a1755bfc
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobes event arguments with types
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
+
+echo 0 > events/enable
+echo > kprobe_events
+enable_tracing
+
+echo 'p:testprobe _do_fork $stack0:s32 $stack0:u32 $stack0:x32 $stack0:b8@4/32' > kprobe_events
+grep testprobe kprobe_events
+test -d events/kprobes/testprobe
+
+echo 1 > events/kprobes/testprobe/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe/enable
+ARGS=`tail -n 1 trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'`
+
+check_types() {
+  X1=`printf "%x" $1 | tail -c 8`
+  X2=`printf "%x" $2`
+  X3=`printf "%x" $3`
+  test $X1 = $X2
+  test $X2 = $X3
+  test 0x$X3 = $3
+
+  B4=`printf "%02x" $4`
+  B3=`echo -n $X3 | tail -c 3 | head -c 2`
+  test $B3 = $B4
+}
+check_types $ARGS
+
+echo "-:testprobe" >> kprobe_events
+clear_trace
+test -d events/kprobes/testprobe && exit_fail || exit_pass
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
new file mode 100644
index 000000000..2724a1068
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event auto/manual naming
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+disable_events
+echo > kprobe_events
+
+:;: "Add an event on function without name" ;:
+
+FUNC=`grep " [tT] .*vfs_read$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+[ "x" != "x$FUNC" ] || exit_unresolved
+echo "p $FUNC" > kprobe_events
+PROBE_NAME=`echo $FUNC | tr ".:" "_"`
+test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
+
+:;: "Add an event on function with new name" ;:
+
+echo "p:event1 $FUNC" > kprobe_events
+test -d events/kprobes/event1 || exit_failure
+
+:;: "Add an event on function with new name and group" ;:
+
+echo "p:kprobes2/event2 $FUNC" > kprobe_events
+test -d events/kprobes2/event2 || exit_failure
+
+:;: "Add an event on dot function without name" ;:
+
+FUNC=`grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+[ "x" != "x$FUNC" ] || exit_unresolved
+echo "p $FUNC" > kprobe_events
+EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:`
+[ "x" != "x$EVENT" ] || exit_failure
+test -d events/$EVENT || exit_failure
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
new file mode 100644
index 000000000..cc4cac0e6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event with function tracer
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+grep function available_tracers || exit_unsupported # this is configurable
+
+# prepare
+echo nop > current_tracer
+echo _do_fork > set_ftrace_filter
+echo 0 > events/enable
+echo > kprobe_events
+echo 'p:testprobe _do_fork' > kprobe_events
+
+# kprobe on / ftrace off
+echo 1 > events/kprobes/testprobe/enable
+echo > trace
+( echo "forked")
+grep testprobe trace
+! grep '_do_fork <-' trace
+
+# kprobe on / ftrace on
+echo function > current_tracer
+echo > trace
+( echo "forked")
+grep testprobe trace
+grep '_do_fork <-' trace
+
+# kprobe off / ftrace on
+echo 0 > events/kprobes/testprobe/enable
+echo > trace
+( echo "forked")
+! grep testprobe trace
+grep '_do_fork <-' trace
+
+# kprobe on / ftrace on
+echo 1 > events/kprobes/testprobe/enable
+echo function > current_tracer
+echo > trace
+( echo "forked")
+grep testprobe trace
+grep '_do_fork <-' trace
+
+# kprobe on / ftrace off
+echo nop > current_tracer
+echo > trace
+( echo "forked")
+grep testprobe trace
+! grep '_do_fork <-' trace
+
+# cleanup
+echo nop > current_tracer
+echo > set_ftrace_filter
+echo 0 > events/kprobes/testprobe/enable
+echo > kprobe_events
+echo > trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
new file mode 100644
index 000000000..1e9f75f7a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - probing module
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+disable_events
+echo > kprobe_events
+
+:;: "Add an event on a module function without specifying event name" ;:
+
+MOD=`lsmod | head -n 2 | tail -n 1 | cut -f1 -d" "`
+FUNC=`grep -m 1 ".* t .*\\[$MOD\\]" /proc/kallsyms | xargs | cut -f3 -d" "`
+[ "x" != "x$MOD" -a "y" != "y$FUNC" ] || exit_unresolved
+echo "p $MOD:$FUNC" > kprobe_events
+PROBE_NAME=`echo $MOD:$FUNC | tr ".:" "_"`
+test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
+
+:;: "Add an event on a module function with new event name" ;:
+
+echo "p:event1 $MOD:$FUNC" > kprobe_events
+test -d events/kprobes/event1 || exit_failure
+
+:;: "Add an event on a module function with new event and group name" ;:
+
+echo "p:kprobes1/event1 $MOD:$FUNC" > kprobe_events
+test -d events/kprobes1/event1 || exit_failure
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
new file mode 100644
index 000000000..321954683
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe dynamic event with arguments
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo 'r:testprobe2 _do_fork $retval' > kprobe_events
+grep testprobe2 kprobe_events
+test -d events/kprobes/testprobe2
+echo 1 > events/kprobes/testprobe2/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe2/enable
+echo '-:testprobe2' >> kprobe_events
+clear_trace
+test -d events/kprobes/testprobe2 && exit_fail || exit_pass
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
new file mode 100644
index 000000000..7c0290684
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
@@ -0,0 +1,41 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe dynamic event with maxactive
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+grep -q 'r\[maxactive\]' README || exit_unsupported # this is older version
+
+echo > kprobe_events
+
+# Test if we successfully reject unknown messages
+if echo 'a:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
+
+# Test if we successfully reject too big maxactive
+if echo 'r1000000:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
+
+# Test if we successfully reject unparsable numbers for maxactive
+if echo 'r10fuzz:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
+
+# Test for kretprobe with event name without maxactive
+echo 'r:myprobeaccept inet_csk_accept' > kprobe_events
+grep myprobeaccept kprobe_events
+test -d events/kprobes/myprobeaccept
+echo '-:myprobeaccept' >> kprobe_events
+
+# Test for kretprobe with event name with a small maxactive
+echo 'r10:myprobeaccept inet_csk_accept' > kprobe_events
+grep myprobeaccept kprobe_events
+test -d events/kprobes/myprobeaccept
+echo '-:myprobeaccept' >> kprobe_events
+
+# Test for kretprobe without event name without maxactive
+echo 'r inet_csk_accept' > kprobe_events
+grep inet_csk_accept kprobe_events
+echo > kprobe_events
+
+# Test for kretprobe without event name with a small maxactive
+echo 'r10 inet_csk_accept' > kprobe_events
+grep inet_csk_accept kprobe_events
+echo > kprobe_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
new file mode 100644
index 000000000..da298f191
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Register/unregister many kprobe events
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+# ftrace fentry skip size depends on the machine architecture.
+# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
+case `uname -m` in
+  x86_64|i[3456]86) OFFS=5;;
+  ppc64le) OFFS=8;;
+  *) OFFS=0;;
+esac
+
+if [ -d events/kprobes ]; then
+  echo 0 > events/kprobes/enable
+  echo > kprobe_events
+fi
+
+N=0
+echo "Setup up kprobes on first available 256 text symbols"
+grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
+while read i; do
+  echo p ${i}+${OFFS} >> kprobe_events && N=$((N+1)) ||:
+  test $N -eq 256 && break
+done
+
+L=`cat kprobe_events | wc -l`
+if [ $L -ne 256 ]; then
+  echo "The number of kprobes events ($L) is not 256"
+  exit_fail
+fi
+
+echo 1 > events/kprobes/enable
+echo 0 > events/kprobes/enable
+echo > kprobe_events
+echo "Waiting for unoptimizing & freeing"
+sleep 5
+echo "Done"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
new file mode 100644
index 000000000..519d2763f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
@@ -0,0 +1,43 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe events - probe points
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+TARGET_FUNC=tracefs_create_dir
+
+dec_addr() { # hexaddr
+  printf "%d" "0x"`echo $1 | tail -c 8`
+}
+
+set_offs() { # prev target next
+  A1=`dec_addr $1`
+  A2=`dec_addr $2`
+  A3=`dec_addr $3`
+  TARGET="0x$2" # an address
+  PREV=`expr $A1 - $A2` # offset to previous symbol
+  NEXT=+`expr $A3 - $A2` # offset to next symbol
+  OVERFLOW=+`printf "0x%x" ${PREV}` # overflow offset to previous symbol
+}
+
+# We have to decode symbol addresses to get correct offsets.
+# If the offset is not an instruction boundary, it cause -EILSEQ.
+set_offs `grep -A1 -B1 ${TARGET_FUNC} /proc/kallsyms | cut -f 1 -d " " | xargs`
+
+UINT_TEST=no
+# printf "%x" -1 returns (unsigned long)-1.
+if [ `printf "%x" -1 | wc -c` != 9 ]; then
+  UINT_TEST=yes
+fi
+
+echo 0 > events/enable
+echo > kprobe_events
+echo "p:testprobe ${TARGET_FUNC}" > kprobe_events
+echo "p:testprobe ${TARGET}" > kprobe_events
+echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events
+! echo "p:testprobe ${TARGET_FUNC}${PREV}" > kprobe_events
+if [ "${UINT_TEST}" = yes ]; then
+! echo "p:testprobe ${TARGET_FUNC}${OVERFLOW}" > kprobe_events
+fi
+echo > kprobe_events
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
new file mode 100644
index 000000000..cbd174334
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
@@ -0,0 +1,73 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: test for the preemptirqsoff tracer
+
+MOD=preemptirq_delay_test
+
+fail() {
+    reset_tracer
+    rmmod $MOD || true
+    exit_fail
+}
+
+unsup() { #msg
+    reset_tracer
+    rmmod $MOD || true
+    echo $1
+    exit_unsupported
+}
+
+modprobe $MOD || unsup "$MOD module not available"
+rmmod $MOD
+
+grep -q "preemptoff" available_tracers || unsup "preemptoff tracer not enabled"
+grep -q "irqsoff" available_tracers || unsup "irqsoff tracer not enabled"
+
+reset_tracer
+
+# Simulate preemptoff section for half a second couple of times
+echo preemptoff > current_tracer
+sleep 1
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+
+cat trace
+
+# Confirm which tracer
+grep -q "tracer: preemptoff" trace || fail
+
+# Check the end of the section
+egrep -q "5.....us : <stack trace>" trace || fail
+
+# Check for 500ms of latency
+egrep -q "latency: 5..... us" trace || fail
+
+reset_tracer
+
+# Simulate irqsoff section for half a second couple of times
+echo irqsoff > current_tracer
+sleep 1
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+
+cat trace
+
+# Confirm which tracer
+grep -q "tracer: irqsoff" trace || fail
+
+# Check the end of the section
+egrep -q "5.....us : <stack trace>" trace || fail
+
+# Check for 500ms of latency
+egrep -q "latency: 5..... us" trace || fail
+
+reset_tracer
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/template b/tools/testing/selftests/ftrace/test.d/template
new file mode 100644
index 000000000..5c39ceb18
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/template
@@ -0,0 +1,10 @@
+#!/bin/sh
+# description: %HERE DESCRIBE WHAT THIS DOES%
+# you have to add ".tc" extention for your testcase file
+# Note that all tests are run with "errexit" option.
+
+exit 0 # Return 0 if the test is passed, otherwise return !0
+# Or you can call exit_pass for passed test, and exit_fail for failed test.
+# If the test could not run because of lack of feature, call exit_unsupported
+# If the test returned unclear results, call exit_unresolved
+# If the test is a dummy, or a placeholder, call exit_untested
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
new file mode 100644
index 000000000..2aabab363
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# description: event trigger - test extended error support
+
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test extended error support"
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null
+if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
+    fail "Failed to generate extended error in histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
new file mode 100644
index 000000000..7fd5b4a8f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test field variable support
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test field variable support"
+
+echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
+echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+    fail "Failed to create inter-event histogram"
+fi
+
+if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+    fail "Failed to create histogram with field variable"
+fi
+
+echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+
+if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+    fail "Failed to remove histogram with field variable"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
new file mode 100644
index 000000000..c93dbe38b
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -0,0 +1,58 @@
+#!/bin/sh
+# description: event trigger - test inter-event combined histogram trigger
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+clear_synthetic_events
+
+echo "Test create synthetic event"
+
+echo 'waking_latency  u64 lat pid_t pid' > synthetic_events
+if [ ! -d events/synthetic/waking_latency ]; then
+    fail "Failed to create waking_latency synthetic event"
+fi
+
+echo "Test combined histogram"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger
+
+echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
+echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
+
+echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+    fail "Failed to create combined histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
new file mode 100644
index 000000000..c193dce61
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
@@ -0,0 +1,44 @@
+#!/bin/sh
+# description: event trigger - test multiple actions on hist trigger
+
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test multiple actions on hist trigger"
+echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+TRIGGER1=events/sched/sched_wakeup/trigger
+TRIGGER2=events/sched/sched_switch/trigger
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
new file mode 100644
index 000000000..e84e7d048
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+    fail "Failed to create onmatch action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
new file mode 100644
index 000000000..7907d8aac
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch-onmax action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch-onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
+    fail "Failed to create onmatch-onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
new file mode 100644
index 000000000..38b7ed624
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmax action
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+ping localhost -c 3
+if ! grep -q "max:" events/sched/sched_switch/hist; then
+    fail "Failed to create onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
new file mode 100644
index 000000000..c604438df
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test synthetic event create remove
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency  u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to create wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test remove synthetic event"
+echo '!wakeup_latency  u64 lat pid_t pid char comm[16]' >> synthetic_events
+if [ -d events/synthetic/wakeup_latency ]; then
+    fail "Failed to delete wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency  u64 lat pid_t pid char' > synthetic_events > /dev/null
+if [ -d events/synthetic/wakeup_latency ]; then
+    fail "Created wakeup_latency synthetic event with an invalid format"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
new file mode 100644
index 000000000..88e6c3f43
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic event is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test synthetic_events syntax parser"
+
+echo > synthetic_events
+
+# synthetic event must have a field
+! echo "myevent" >> synthetic_events
+echo "myevent u64 var1" >> synthetic_events
+
+# synthetic event must be found in synthetic_events
+grep "myevent[[:space:]]u64 var1" synthetic_events
+
+# it is not possible to add same name event
+! echo "myevent u64 var2" >> synthetic_events
+
+# Non-append open will cleanup all events and add new one
+echo "myevent u64 var2" > synthetic_events
+
+# multiple fields with different spaces
+echo "myevent u64 var1; u64 var2;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ;u64 var2" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+
+# test field types
+echo "myevent u32 var" > synthetic_events
+echo "myevent u16 var" > synthetic_events
+echo "myevent u8 var" > synthetic_events
+echo "myevent s64 var" > synthetic_events
+echo "myevent s32 var" > synthetic_events
+echo "myevent s16 var" > synthetic_events
+echo "myevent s8 var" > synthetic_events
+
+echo "myevent char var" > synthetic_events
+echo "myevent int var" > synthetic_events
+echo "myevent long var" > synthetic_events
+echo "myevent pid_t var" > synthetic_events
+
+echo "myevent unsigned char var" > synthetic_events
+echo "myevent unsigned int var" > synthetic_events
+echo "myevent unsigned long var" > synthetic_events
+grep "myevent[[:space:]]unsigned long var" synthetic_events
+
+# test string type
+echo "myevent char var[10]" > synthetic_events
+grep "myevent[[:space:]]char\[10\] var" synthetic_events
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
new file mode 100644
index 000000000..28cc355a3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test event enable/disable trigger
+# flags: instance
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+FEATURE=`grep enable_event events/sched/sched_process_fork/trigger`
+if [ -z "$FEATURE" ]; then
+    echo "event enable/disable trigger is not supported"
+    exit_unsupported
+fi
+
+echo "Test enable_event trigger"
+echo 0 > events/sched/sched_switch/enable
+echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat events/sched/sched_switch/enable` != '1*' ]; then
+    fail "enable_event trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test disable_event trigger"
+echo 1 > events/sched/sched_switch/enable
+echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat events/sched/sched_switch/enable` != '0*' ]; then
+    fail "disable_event trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test semantic error for event enable/disable trigger"
+! echo 'enable_event:nogroup:noevent' > events/sched/sched_process_fork/trigger
+! echo 'disable_event+1' > events/sched/sched_process_fork/trigger
+echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+! echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+! echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
new file mode 100644
index 000000000..a48e23eb8
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
@@ -0,0 +1,61 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test trigger filter
+# flags: instance
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test trigger filter"
+echo 1 > tracing_on
+echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat tracing_on` -ne 1 ]; then
+    fail "traceoff trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test semantic error for trigger filter"
+! echo 'traceoff if a' > events/sched/sched_process_fork/trigger
+! echo 'traceoff if common_pid=0' > events/sched/sched_process_fork/trigger
+! echo 'traceoff if common_pid==b' > events/sched/sched_process_fork/trigger
+echo 'traceoff if common_pid == 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+! echo 'traceoff if common_pid == child_pid' > events/sched/sched_process_fork/trigger
+echo 'traceoff if common_pid <= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+echo 'traceoff if common_pid >= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+echo 'traceoff if parent_pid >= 0 && child_pid >= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+echo 'traceoff if parent_pid >= 0 || child_pid >= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+
+
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
new file mode 100644
index 000000000..8da80efc4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -0,0 +1,76 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram modifiers
+# flags: instance
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test histogram with execname modifier"
+
+echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+COMM=`cat /proc/$$/comm`
+grep "common_pid: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
+    fail "execname modifier on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with hex modifier"
+
+echo 'hist:keys=parent_pid.hex' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+# Note that $$ is the parent pid. $PID is current PID.
+HEX=`printf %x $PID`
+grep "parent_pid: $HEX" events/sched/sched_process_fork/hist > /dev/null || \
+    fail "hex modifier on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with syscall modifier"
+
+echo 'hist:keys=id.syscall' > events/raw_syscalls/sys_exit/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep "id: \(unknown_\|sys_\)" events/raw_syscalls/sys_exit/hist > /dev/null || \
+    fail "syscall modifier on raw_syscalls/sys_exit did not work"
+
+
+reset_trigger
+
+echo "Test histgram with log2 modifier"
+
+echo 'hist:keys=bytes_req.log2' > events/kmem/kmalloc/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep 'bytes_req: ~ 2^[0-9]*' events/kmem/kmalloc/hist > /dev/null || \
+    fail "log2 modifier on kmem/kmalloc did not work"
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
new file mode 100644
index 000000000..449fe9ff9
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -0,0 +1,84 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram trigger
+# flags: instance
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test histogram basic tigger"
+
+echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \
+    fail "hist trigger on sched_process_fork did not work"
+grep child events/sched/sched_process_fork/hist > /dev/null || \
+    fail "hist trigger on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with compound keys"
+
+echo 'hist:keys=parent_pid,child_pid' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep '^{ parent_pid:.*, child_pid:.*}' events/sched/sched_process_fork/hist > /dev/null || \
+    fail "compound keys on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with string key"
+
+echo 'hist:keys=parent_comm' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+COMM=`cat /proc/$$/comm`
+grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
+    fail "string key on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with sort key"
+
+echo 'hist:keys=parent_pid,child_pid:sort=child_pid.ascending' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+
+check_inc() {
+    while [ $# -gt 1 ]; do
+        [ $1 -gt $2 ] && return 1
+        shift 1
+    done
+    return 0
+}
+check_inc `grep -o "child_pid:[[:space:]]*[[:digit:]]*" \
+    events/sched/sched_process_fork/hist | cut -d: -f2 ` ||
+    fail "sort param on sched_process_fork did not work"
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
new file mode 100644
index 000000000..c5ef8b9d0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test multiple histogram triggers
+# flags: instance
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+reset_trigger
+
+echo "Test histogram multiple tiggers"
+
+echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
+echo 'hist:keys=parent_comm:vals=child_pid' >> events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \
+    fail "hist trigger on sched_process_fork did not work"
+grep child events/sched/sched_process_fork/hist > /dev/null || \
+    fail "hist trigger on sched_process_fork did not work"
+COMM=`cat /proc/$$/comm`
+grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
+    fail "string key on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with its name"
+
+echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep test_hist events/sched/sched_process_fork/hist > /dev/null || \
+    fail "named event on sched_process_fork did not work"
+
+echo "Test same named histogram on different events"
+
+echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_exit/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep test_hist events/sched/sched_process_exit/hist > /dev/null || \
+    fail "named event on sched_process_fork did not work"
+
+diffs=`diff events/sched/sched_process_exit/hist events/sched/sched_process_fork/hist | wc -l`
+test $diffs -eq 0 || fail "Same name histograms are not same"
+
+reset_trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
new file mode 100644
index 000000000..ed38f0050
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test snapshot-trigger
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f snapshot ]; then
+    echo "snapshot is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+FEATURE=`grep snapshot events/sched/sched_process_fork/trigger`
+if [ -z "$FEATURE" ]; then
+    echo "snapshot trigger is not supported"
+    exit_unsupported
+fi
+
+echo "Test snapshot tigger"
+echo 0 > snapshot
+echo 1 > events/sched/sched_process_fork/enable
+( echo "forked")
+echo 'snapshot:1' > events/sched/sched_process_fork/trigger
+( echo "forked")
+grep sched_process_fork snapshot > /dev/null || \
+    fail "snapshot trigger on sched_process_fork did not work"
+
+reset_trigger
+echo 0 > snapshot
+echo 0 > events/sched/sched_process_fork/enable
+
+echo "Test snapshot semantic errors"
+
+! echo "snapshot+1" > events/sched/sched_process_fork/trigger
+echo "snapshot" > events/sched/sched_process_fork/trigger
+! echo "snapshot" > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
new file mode 100644
index 000000000..3121d795a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test stacktrace-trigger
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger`
+if [ -z "$FEATURE" ]; then
+    echo "stacktrace trigger is not supported"
+    exit_unsupported
+fi
+
+echo "Test stacktrace tigger"
+echo 0 > trace
+echo 0 > options/stacktrace
+echo 'stacktrace' > events/sched/sched_process_fork/trigger
+( echo "forked")
+grep "<stack trace>" trace > /dev/null || \
+    fail "stacktrace trigger on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test stacktrace semantic errors"
+
+! echo "stacktrace:foo" > events/sched/sched_process_fork/trigger
+echo "stacktrace" > events/sched/sched_process_fork/trigger
+! echo "stacktrace" > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
new file mode 100644
index 000000000..2acbfe2c0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
@@ -0,0 +1,49 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram trigger
+# flags: instance
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram trace_marker tigger"
+
+echo 'hist:keys=common_pid' > events/ftrace/print/trigger
+for i in `seq 1 10` ; do echo "hello" > trace_marker; done
+grep 'hitcount: *10$' events/ftrace/print/hist > /dev/null || \
+    fail "hist trigger did not trigger correct times on trace_marker"
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
new file mode 100644
index 000000000..6748e8cb4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test snapshot trigger
+# flags: instance
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    echo 0 > snapshot
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f snapshot ]; then
+    echo "snapshot is not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+test_trace() {
+    file=$1
+    x=$2
+
+    cat $file | while read line; do
+	comment=`echo $line | sed -e 's/^#//'`
+	if [ "$line" != "$comment" ]; then
+	    continue
+	fi
+	echo "testing $line for >$x<"
+	match=`echo $line | sed -e "s/>$x<//"`
+	if [ "$line" == "$match" ]; then
+	    fail "$line does not have >$x< in it"
+	fi
+	let x=$x+2
+    done
+}
+
+do_reset
+
+echo "Test snapshot trace_marker tigger"
+
+echo 'snapshot' > events/ftrace/print/trigger
+
+# make sure the snapshot is allocated
+
+grep -q 'Snapshot is allocated' snapshot
+
+for i in `seq 1 10` ; do echo "hello >$i<" > trace_marker; done
+
+test_trace trace 1
+test_trace snapshot 2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
new file mode 100644
index 000000000..0a69c5d1c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
@@ -0,0 +1,68 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram with synthetic event against kernel event
+# flags:
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    echo > synthetic_events
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic events not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/sched/sched_waking ]; then
+    echo "event sched_waking is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram kernel event to trace_marker latency histogram trigger"
+
+echo 'latency u64 lat' > synthetic_events
+echo 'hist:keys=pid:ts0=common_timestamp.usecs' > events/sched/sched_waking/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).latency($lat)' > events/ftrace/print/trigger
+echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
+sleep 1
+echo "hello" > trace_marker
+
+grep 'hitcount: *1$' events/ftrace/print/hist > /dev/null || \
+    fail "hist trigger did not trigger correct times on trace_marker"
+
+grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
+    fail "hist trigger did not trigger "
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
new file mode 100644
index 000000000..3666dd6ab
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram with synthetic event
+# flags:
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    echo > synthetic_events
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+    echo "synthetic events not supported"
+    exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+    echo "event trace_marker is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+    echo "hist trigger is not supported"
+    exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram trace_marker to trace_marker latency histogram trigger"
+
+echo 'latency u64 lat' > synthetic_events
+echo 'hist:keys=common_pid:ts0=common_timestamp.usecs if buf == "start"' > events/ftrace/print/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(ftrace.print).latency($lat) if buf == "end"' >> events/ftrace/print/trigger
+echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
+echo -n "start" > trace_marker
+echo -n "end" > trace_marker
+
+cnt=`grep 'hitcount: *1$' events/ftrace/print/hist | wc -l`
+
+if [ $cnt -ne 2 ]; then
+    fail "hist trace_marker trigger did not trigger correctly"
+fi
+
+grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
+    fail "hist trigger did not trigger "
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
new file mode 100644
index 000000000..c59d9eb54
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
@@ -0,0 +1,59 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test traceon/off trigger
+
+do_reset() {
+    reset_trigger
+    echo > set_event
+    clear_trace
+}
+
+fail() { #msg
+    do_reset
+    echo $1
+    exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+    echo "event tracing is not supported"
+    exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+    echo "event trigger is not supported"
+    exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test traceoff trigger"
+echo 1 > tracing_on
+echo 'traceoff' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat tracing_on` -ne 0 ]; then
+    fail "traceoff trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test traceon trigger"
+echo 0 > tracing_on
+echo 'traceon' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat tracing_on` -ne 1 ]; then
+    fail "traceoff trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test semantic error for traceoff/on trigger"
+! echo 'traceoff:badparam' > events/sched/sched_process_fork/trigger
+! echo 'traceoff+0' > events/sched/sched_process_fork/trigger
+echo 'traceon' > events/sched/sched_process_fork/trigger
+! echo 'traceon' > events/sched/sched_process_fork/trigger
+! echo 'traceoff' > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
new file mode 100644
index 000000000..11e157d75
--- /dev/null
+++ b/tools/testing/selftests/futex/Makefile
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0
+SUBDIRS := functional
+
+TEST_PROGS := run.sh
+
+.PHONY: all clean
+
+include ../lib.mk
+
+all:
+	@for DIR in $(SUBDIRS); do		\
+		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+		if [ -e $$DIR/$(TEST_PROGS) ]; then \
+			rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
+		fi \
+	done
+
+override define INSTALL_RULE
+	mkdir -p $(INSTALL_PATH)
+	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+	@for SUBDIR in $(SUBDIRS); do \
+		BUILD_TARGET=$(OUTPUT)/$$SUBDIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+	done;
+endef
+
+override define CLEAN
+	@for DIR in $(SUBDIRS); do		\
+		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+	done
+endef
diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README
new file mode 100644
index 000000000..f3926c33e
--- /dev/null
+++ b/tools/testing/selftests/futex/README
@@ -0,0 +1,62 @@
+Futex Test
+==========
+Futex Test is intended to thoroughly test the Linux kernel futex system call
+API.
+
+Functional tests shall test the documented behavior of the futex operation
+code under test. This includes checking for proper behavior under normal use,
+odd corner cases, regression tests, and abject abuse and misuse.
+
+Futextest will also provide example implementation of mutual exclusion
+primitives. These can be used as is in user applications or can serve as
+examples for system libraries. These will likely be added to either a new lib/
+directory or purely as header files under include/, I'm leaning toward the
+latter.
+
+Quick Start
+-----------
+# make
+# ./run.sh
+
+Design and Implementation Goals
+-------------------------------
+o Tests should be as self contained as is practical so as to facilitate sharing
+  the individual tests on mailing list discussions and bug reports.
+o The build system shall remain as simple as possible, avoiding any archive or
+  shared object building and linking.
+o Where possible, any helper functions or other package-wide code shall be
+  implemented in header files, avoiding the need to compile intermediate object
+  files.
+o External dependencies shall remain as minimal as possible. Currently gcc
+  and glibc are the only dependencies.
+o Tests return 0 for success and < 0 for failure.
+
+Output Formatting
+-----------------
+Test output shall be easily parsable by both human and machine. Title and
+results are printed to stdout, while intermediate ERROR or FAIL messages are
+sent to stderr. Tests shall support the -c option to print PASS, FAIL, and
+ERROR strings in color for easy visual parsing. Output shall conform to the
+following format:
+
+test_name: Description of the test
+	Arguments: arg1=val1 #units specified for clarity where appropriate
+	ERROR: Description of unexpected error
+	 FAIL: Reason for test failure
+	# FIXME: Perhaps an " INFO: informational message" option would be
+	#        useful here. Using -v to toggle it them on and off, as with -c.
+	# there may be multiple ERROR or FAIL messages
+Result: (PASS|FAIL|ERROR)
+
+Naming
+------
+o FIXME: decide on a sane test naming scheme.  Currently the tests are named
+  based on the primary futex operation they test. Eventually this will become a
+  problem as we intend to write multiple tests which collide in this namespace.
+  Perhaps something like "wait-wake-1" "wait-wake-2" is adequate, leaving the
+  detailed description in the test source and the output.
+
+Coding Style
+------------
+o The Futex Test project adheres to the coding standards set forth by Linux
+  kernel as defined in the Linux source Documentation/process/coding-style.rst.
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
new file mode 100644
index 000000000..a09f57061
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -0,0 +1,7 @@
+futex_requeue_pi
+futex_requeue_pi_mismatched_ops
+futex_requeue_pi_signal_restart
+futex_wait_private_mapped_file
+futex_wait_timeout
+futex_wait_uninitialized_heap
+futex_wait_wouldblock
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
new file mode 100644
index 000000000..30996306c
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+INCLUDES := -I../include -I../../
+CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
+LDFLAGS := $(LDFLAGS) -pthread -lrt
+
+HEADERS := \
+	../include/futextest.h \
+	../include/atomic.h \
+	../include/logging.h
+TEST_GEN_FILES := \
+	futex_wait_timeout \
+	futex_wait_wouldblock \
+	futex_requeue_pi \
+	futex_requeue_pi_signal_restart \
+	futex_requeue_pi_mismatched_ops \
+	futex_wait_uninitialized_heap \
+	futex_wait_private_mapped_file
+
+TEST_PROGS := run.sh
+
+top_srcdir = ../../../../..
+KSFT_KHDR_INSTALL := 1
+include ../../lib.mk
+
+$(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
new file mode 100644
index 000000000..54cd5c414
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -0,0 +1,412 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2006-2008
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      This test excercises the futex syscall op codes needed for requeuing
+ *      priority inheritance aware POSIX condition variables and mutexes.
+ *
+ * AUTHORS
+ *      Sripathi Kodi <sripathik@in.ibm.com>
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2008-Jan-13: Initial version by Sripathi Kodi <sripathik@in.ibm.com>
+ *      2009-Nov-6: futex test adaptation by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-requeue-pi"
+#define MAX_WAKE_ITERS 1000
+#define THREAD_MAX 10
+#define SIGNAL_PERIOD_US 100
+
+atomic_t waiters_blocked = ATOMIC_INITIALIZER;
+atomic_t waiters_woken = ATOMIC_INITIALIZER;
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+futex_t wake_complete = FUTEX_INITIALIZER;
+
+/* Test option defaults */
+static long timeout_ns;
+static int broadcast;
+static int owner;
+static int locked;
+
+struct thread_arg {
+	long id;
+	struct timespec *timeout;
+	int lock;
+	int ret;
+};
+#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 }
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -b	Broadcast wakeup (all waiters)\n");
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -l	Lock the pi futex across requeue\n");
+	printf("  -o	Use a third party pi futex owner during requeue (cancels -l)\n");
+	printf("  -t N	Timeout in nanoseconds (default: 0)\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+		     int policy, int prio)
+{
+	int ret;
+	struct sched_param schedp;
+	pthread_attr_t attr;
+
+	pthread_attr_init(&attr);
+	memset(&schedp, 0, sizeof(schedp));
+
+	ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+	if (ret) {
+		error("pthread_attr_setinheritsched\n", ret);
+		return -1;
+	}
+
+	ret = pthread_attr_setschedpolicy(&attr, policy);
+	if (ret) {
+		error("pthread_attr_setschedpolicy\n", ret);
+		return -1;
+	}
+
+	schedp.sched_priority = prio;
+	ret = pthread_attr_setschedparam(&attr, &schedp);
+	if (ret) {
+		error("pthread_attr_setschedparam\n", ret);
+		return -1;
+	}
+
+	ret = pthread_create(pth, &attr, func, arg);
+	if (ret) {
+		error("pthread_create\n", ret);
+		return -1;
+	}
+	return 0;
+}
+
+
+void *waiterfn(void *arg)
+{
+	struct thread_arg *args = (struct thread_arg *)arg;
+	futex_t old_val;
+
+	info("Waiter %ld: running\n", args->id);
+	/* Each thread sleeps for a different amount of time
+	 * This is to avoid races, because we don't lock the
+	 * external mutex here */
+	usleep(1000 * (long)args->id);
+
+	old_val = f1;
+	atomic_inc(&waiters_blocked);
+	info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
+	     &f1, f1, &f2);
+	args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout,
+					  FUTEX_PRIVATE_FLAG);
+
+	info("waiter %ld woke with %d %s\n", args->id, args->ret,
+	     args->ret < 0 ? strerror(errno) : "");
+	atomic_inc(&waiters_woken);
+	if (args->ret < 0) {
+		if (args->timeout && errno == ETIMEDOUT)
+			args->ret = 0;
+		else {
+			args->ret = RET_ERROR;
+			error("futex_wait_requeue_pi\n", errno);
+		}
+		futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+	}
+	futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+	info("Waiter %ld: exiting with %d\n", args->id, args->ret);
+	pthread_exit((void *)&args->ret);
+}
+
+void *broadcast_wakerfn(void *arg)
+{
+	struct thread_arg *args = (struct thread_arg *)arg;
+	int nr_requeue = INT_MAX;
+	int task_count = 0;
+	futex_t old_val;
+	int nr_wake = 1;
+	int i = 0;
+
+	info("Waker: waiting for waiters to block\n");
+	while (waiters_blocked.val < THREAD_MAX)
+		usleep(1000);
+	usleep(1000);
+
+	info("Waker: Calling broadcast\n");
+	if (args->lock) {
+		info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
+		futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+	}
+ continue_requeue:
+	old_val = f1;
+	args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue,
+				   FUTEX_PRIVATE_FLAG);
+	if (args->ret < 0) {
+		args->ret = RET_ERROR;
+		error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+	} else if (++i < MAX_WAKE_ITERS) {
+		task_count += args->ret;
+		if (task_count < THREAD_MAX - waiters_woken.val)
+			goto continue_requeue;
+	} else {
+		error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
+		       0, MAX_WAKE_ITERS, task_count, THREAD_MAX);
+		args->ret = RET_ERROR;
+	}
+
+	futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+	if (args->lock)
+		futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+	if (args->ret > 0)
+		args->ret = task_count;
+
+	info("Waker: exiting with %d\n", args->ret);
+	pthread_exit((void *)&args->ret);
+}
+
+void *signal_wakerfn(void *arg)
+{
+	struct thread_arg *args = (struct thread_arg *)arg;
+	unsigned int old_val;
+	int nr_requeue = 0;
+	int task_count = 0;
+	int nr_wake = 1;
+	int i = 0;
+
+	info("Waker: waiting for waiters to block\n");
+	while (waiters_blocked.val < THREAD_MAX)
+		usleep(1000);
+	usleep(1000);
+
+	while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) {
+		info("task_count: %d, waiters_woken: %d\n",
+		     task_count, waiters_woken.val);
+		if (args->lock) {
+			info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
+			     f2, &f2);
+			futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+		}
+		info("Waker: Calling signal\n");
+		/* cond_signal */
+		old_val = f1;
+		args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2,
+						 nr_wake, nr_requeue,
+						 FUTEX_PRIVATE_FLAG);
+		if (args->ret < 0)
+			args->ret = -errno;
+		info("futex: %x\n", f2);
+		if (args->lock) {
+			info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
+			     f2, &f2);
+			futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+		}
+		info("futex: %x\n", f2);
+		if (args->ret < 0) {
+			error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+			args->ret = RET_ERROR;
+			break;
+		}
+
+		task_count += args->ret;
+		usleep(SIGNAL_PERIOD_US);
+		i++;
+		/* we have to loop at least THREAD_MAX times */
+		if (i > MAX_WAKE_ITERS + THREAD_MAX) {
+			error("max signaling iterations (%d) reached, giving up on pending waiters.\n",
+			      0, MAX_WAKE_ITERS + THREAD_MAX);
+			args->ret = RET_ERROR;
+			break;
+		}
+	}
+
+	futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+	if (args->ret >= 0)
+		args->ret = task_count;
+
+	info("Waker: exiting with %d\n", args->ret);
+	info("Waker: waiters_woken: %d\n", waiters_woken.val);
+	pthread_exit((void *)&args->ret);
+}
+
+void *third_party_blocker(void *arg)
+{
+	struct thread_arg *args = (struct thread_arg *)arg;
+	int ret2 = 0;
+
+	args->ret = futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+	if (args->ret)
+		goto out;
+	args->ret = futex_wait(&wake_complete, wake_complete, NULL,
+			       FUTEX_PRIVATE_FLAG);
+	ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ out:
+	if (args->ret || ret2) {
+		error("third_party_blocker() futex error", 0);
+		args->ret = RET_ERROR;
+	}
+
+	pthread_exit((void *)&args->ret);
+}
+
+int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
+{
+	void *(*wakerfn)(void *) = signal_wakerfn;
+	struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER;
+	struct thread_arg waker_arg = THREAD_ARG_INITIALIZER;
+	pthread_t waiter[THREAD_MAX], waker, blocker;
+	struct timespec ts, *tsp = NULL;
+	struct thread_arg args[THREAD_MAX];
+	int *waiter_ret;
+	int i, ret = RET_PASS;
+
+	if (timeout_ns) {
+		time_t secs;
+
+		info("timeout_ns = %ld\n", timeout_ns);
+		ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+		secs = (ts.tv_nsec + timeout_ns) / 1000000000;
+		ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000;
+		ts.tv_sec += secs;
+		info("ts.tv_sec  = %ld\n", ts.tv_sec);
+		info("ts.tv_nsec = %ld\n", ts.tv_nsec);
+		tsp = &ts;
+	}
+
+	if (broadcast)
+		wakerfn = broadcast_wakerfn;
+
+	if (third_party_owner) {
+		if (create_rt_thread(&blocker, third_party_blocker,
+				     (void *)&blocker_arg, SCHED_FIFO, 1)) {
+			error("Creating third party blocker thread failed\n",
+			      errno);
+			ret = RET_ERROR;
+			goto out;
+		}
+	}
+
+	atomic_set(&waiters_woken, 0);
+	for (i = 0; i < THREAD_MAX; i++) {
+		args[i].id = i;
+		args[i].timeout = tsp;
+		info("Starting thread %d\n", i);
+		if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i],
+				     SCHED_FIFO, 1)) {
+			error("Creating waiting thread failed\n", errno);
+			ret = RET_ERROR;
+			goto out;
+		}
+	}
+	waker_arg.lock = lock;
+	if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg,
+			     SCHED_FIFO, 1)) {
+		error("Creating waker thread failed\n", errno);
+		ret = RET_ERROR;
+		goto out;
+	}
+
+	/* Wait for threads to finish */
+	/* Store the first error or failure encountered in waiter_ret */
+	waiter_ret = &args[0].ret;
+	for (i = 0; i < THREAD_MAX; i++)
+		pthread_join(waiter[i],
+			     *waiter_ret ? NULL : (void **)&waiter_ret);
+
+	if (third_party_owner)
+		pthread_join(blocker, NULL);
+	pthread_join(waker, NULL);
+
+out:
+	if (!ret) {
+		if (*waiter_ret)
+			ret = *waiter_ret;
+		else if (waker_arg.ret < 0)
+			ret = waker_arg.ret;
+		else if (blocker_arg.ret)
+			ret = blocker_arg.ret;
+	}
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	int c, ret;
+
+	while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
+		switch (c) {
+		case 'b':
+			broadcast = 1;
+			break;
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'l':
+			locked = 1;
+			break;
+		case 'o':
+			owner = 1;
+			locked = 0;
+			break;
+		case 't':
+			timeout_ns = atoi(optarg);
+			break;
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	ksft_print_header();
+	ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0]));
+	ksft_print_msg(
+		"\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+		broadcast, locked, owner, timeout_ns);
+
+	/*
+	 * FIXME: unit_test is obsolete now that we parse options and the
+	 * various style of runs are done by run.sh - simplify the code and move
+	 * unit_test into main()
+	 */
+	ret = unit_test(broadcast, locked, owner, timeout_ns);
+
+	print_result(TEST_NAME, ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
new file mode 100644
index 000000000..08187a165
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -0,0 +1,138 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      1. Block a thread using FUTEX_WAIT
+ *      2. Attempt to use FUTEX_CMP_REQUEUE_PI on the futex from 1.
+ *      3. The kernel must detect the mismatch and return -EINVAL.
+ *
+ * AUTHOR
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-requeue-pi-mismatched-ops"
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+int child_ret = 0;
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+void *blocking_child(void *arg)
+{
+	child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG);
+	if (child_ret < 0) {
+		child_ret = -errno;
+		error("futex_wait\n", errno);
+	}
+	return (void *)&child_ret;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret = RET_PASS;
+	pthread_t child;
+	int c;
+
+	while ((c = getopt(argc, argv, "chv:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	ksft_print_header();
+	ksft_print_msg("%s: Detect mismatched requeue_pi operations\n",
+	       basename(argv[0]));
+
+	if (pthread_create(&child, NULL, blocking_child, NULL)) {
+		error("pthread_create\n", errno);
+		ret = RET_ERROR;
+		goto out;
+	}
+	/* Allow the child to block in the kernel. */
+	sleep(1);
+
+	/*
+	 * The kernel should detect the waiter did not setup the
+	 * q->requeue_pi_key and return -EINVAL. If it does not,
+	 * it likely gave the lock to the child, which is now hung
+	 * in the kernel.
+	 */
+	ret = futex_cmp_requeue_pi(&f1, f1, &f2, 1, 0, FUTEX_PRIVATE_FLAG);
+	if (ret < 0) {
+		if (errno == EINVAL) {
+			/*
+			 * The kernel correctly detected the mismatched
+			 * requeue_pi target and aborted. Wake the child with
+			 * FUTEX_WAKE.
+			 */
+			ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG);
+			if (ret == 1) {
+				ret = RET_PASS;
+			} else if (ret < 0) {
+				error("futex_wake\n", errno);
+				ret = RET_ERROR;
+			} else {
+				error("futex_wake did not wake the child\n", 0);
+				ret = RET_ERROR;
+			}
+		} else {
+			error("futex_cmp_requeue_pi\n", errno);
+			ret = RET_ERROR;
+		}
+	} else if (ret > 0) {
+		fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
+		ret = RET_FAIL;
+	} else {
+		error("futex_cmp_requeue_pi found no waiters\n", 0);
+		ret = RET_ERROR;
+	}
+
+	pthread_join(child, NULL);
+
+	if (!ret)
+		ret = child_ret;
+
+ out:
+	/* If the kernel crashes, we shouldn't return at all. */
+	print_result(TEST_NAME, ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
new file mode 100644
index 000000000..f0542a344
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -0,0 +1,225 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2006-2008
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      This test exercises the futex_wait_requeue_pi() signal handling both
+ *      before and after the requeue. The first should be restarted by the
+ *      kernel. The latter should return EWOULDBLOCK to the waiter.
+ *
+ * AUTHORS
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2008-May-5: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-requeue-pi-signal-restart"
+#define DELAY_US 100
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+atomic_t requeued = ATOMIC_INITIALIZER;
+
+int waiter_ret = 0;
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+		     int policy, int prio)
+{
+	struct sched_param schedp;
+	pthread_attr_t attr;
+	int ret;
+
+	pthread_attr_init(&attr);
+	memset(&schedp, 0, sizeof(schedp));
+
+	ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+	if (ret) {
+		error("pthread_attr_setinheritsched\n", ret);
+		return -1;
+	}
+
+	ret = pthread_attr_setschedpolicy(&attr, policy);
+	if (ret) {
+		error("pthread_attr_setschedpolicy\n", ret);
+		return -1;
+	}
+
+	schedp.sched_priority = prio;
+	ret = pthread_attr_setschedparam(&attr, &schedp);
+	if (ret) {
+		error("pthread_attr_setschedparam\n", ret);
+		return -1;
+	}
+
+	ret = pthread_create(pth, &attr, func, arg);
+	if (ret) {
+		error("pthread_create\n", ret);
+		return -1;
+	}
+	return 0;
+}
+
+void handle_signal(int signo)
+{
+	info("signal received %s requeue\n",
+	     requeued.val ? "after" : "prior to");
+}
+
+void *waiterfn(void *arg)
+{
+	unsigned int old_val;
+	int res;
+
+	waiter_ret = RET_PASS;
+
+	info("Waiter running\n");
+	info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+	old_val = f1;
+	res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL,
+				    FUTEX_PRIVATE_FLAG);
+	if (!requeued.val || errno != EWOULDBLOCK) {
+		fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
+		     res, strerror(errno));
+		info("w2:futex: %x\n", f2);
+		if (!res)
+			futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+		waiter_ret = RET_FAIL;
+	}
+
+	info("Waiter exiting with %d\n", waiter_ret);
+	pthread_exit(NULL);
+}
+
+
+int main(int argc, char *argv[])
+{
+	unsigned int old_val;
+	struct sigaction sa;
+	pthread_t waiter;
+	int c, res, ret = RET_PASS;
+
+	while ((c = getopt(argc, argv, "chv:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	ksft_print_header();
+	ksft_print_msg("%s: Test signal handling during requeue_pi\n",
+	       basename(argv[0]));
+	ksft_print_msg("\tArguments: <none>\n");
+
+	sa.sa_handler = handle_signal;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = 0;
+	if (sigaction(SIGUSR1, &sa, NULL)) {
+		error("sigaction\n", errno);
+		exit(1);
+	}
+
+	info("m1:f2: %x\n", f2);
+	info("Creating waiter\n");
+	res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1);
+	if (res) {
+		error("Creating waiting thread failed", res);
+		ret = RET_ERROR;
+		goto out;
+	}
+
+	info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+	info("m2:f2: %x\n", f2);
+	futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG);
+	info("m3:f2: %x\n", f2);
+
+	while (1) {
+		/*
+		 * signal the waiter before requeue, waiter should automatically
+		 * restart futex_wait_requeue_pi() in the kernel. Wait for the
+		 * waiter to block on f1 again.
+		 */
+		info("Issuing SIGUSR1 to waiter\n");
+		pthread_kill(waiter, SIGUSR1);
+		usleep(DELAY_US);
+
+		info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
+		old_val = f1;
+		res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0,
+					   FUTEX_PRIVATE_FLAG);
+		/*
+		 * If res is non-zero, we either requeued the waiter or hit an
+		 * error, break out and handle it. If it is zero, then the
+		 * signal may have hit before the the waiter was blocked on f1.
+		 * Try again.
+		 */
+		if (res > 0) {
+			atomic_set(&requeued, 1);
+			break;
+		} else if (res < 0) {
+			error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+			ret = RET_ERROR;
+			break;
+		}
+	}
+	info("m4:f2: %x\n", f2);
+
+	/*
+	 * Signal the waiter after requeue, waiter should return from
+	 * futex_wait_requeue_pi() with EWOULDBLOCK. Join the thread here so the
+	 * futex_unlock_pi() can't happen before the signal wakeup is detected
+	 * in the kernel.
+	 */
+	info("Issuing SIGUSR1 to waiter\n");
+	pthread_kill(waiter, SIGUSR1);
+	info("Waiting for waiter to return\n");
+	pthread_join(waiter, NULL);
+
+	info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
+	futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+	info("m5:f2: %x\n", f2);
+
+ out:
+	if (ret == RET_PASS && waiter_ret)
+		ret = waiter_ret;
+
+	print_result(TEST_NAME, ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
new file mode 100644
index 000000000..6216de828
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -0,0 +1,128 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Internally, Futex has two handling mode, anon and file. The private file
+ *      mapping is special. At first it behave as file, but after write anything
+ *      it behave as anon. This test is intent to test such case.
+ *
+ * AUTHOR
+ *      KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ *      2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <libgen.h>
+#include <signal.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait-private-mapped-file"
+#define PAGE_SZ 4096
+
+char pad[PAGE_SZ] = {1};
+futex_t val = 1;
+char pad2[PAGE_SZ] = {1};
+
+#define WAKE_WAIT_US 3000000
+struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0};
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+void *thr_futex_wait(void *arg)
+{
+	int ret;
+
+	info("futex wait\n");
+	ret = futex_wait(&val, 1, &wait_timeout, 0);
+	if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) {
+		error("futex error.\n", errno);
+		print_result(TEST_NAME, RET_ERROR);
+		exit(RET_ERROR);
+	}
+
+	if (ret && errno == ETIMEDOUT)
+		fail("waiter timedout\n");
+
+	info("futex_wait: ret = %d, errno = %d\n", ret, errno);
+
+	return NULL;
+}
+
+int main(int argc, char **argv)
+{
+	pthread_t thr;
+	int ret = RET_PASS;
+	int res;
+	int c;
+
+	while ((c = getopt(argc, argv, "chv:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	ksft_print_header();
+	ksft_print_msg(
+		"%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
+		basename(argv[0]));
+
+	ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
+	if (ret < 0) {
+		fprintf(stderr, "pthread_create error\n");
+		ret = RET_ERROR;
+		goto out;
+	}
+
+	info("wait a while\n");
+	usleep(WAKE_WAIT_US);
+	val = 2;
+	res = futex_wake(&val, 1, 0);
+	info("futex_wake %d\n", res);
+	if (res != 1) {
+		fail("FUTEX_WAKE didn't find the waiting thread.\n");
+		ret = RET_FAIL;
+	}
+
+	info("join\n");
+	pthread_join(thr, NULL);
+
+ out:
+	print_result(TEST_NAME, ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
new file mode 100644
index 000000000..bab3dfe17
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -0,0 +1,89 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Block on a futex and wait for timeout.
+ *
+ * AUTHOR
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-wait-timeout"
+
+static long timeout_ns = 100000;	/* 100us default timeout */
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -t N	Timeout in nanoseconds (default: 100,000)\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+	futex_t f1 = FUTEX_INITIALIZER;
+	struct timespec to;
+	int res, ret = RET_PASS;
+	int c;
+
+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 't':
+			timeout_ns = atoi(optarg);
+			break;
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	ksft_print_header();
+	ksft_print_msg("%s: Block on a futex and wait for timeout\n",
+	       basename(argv[0]));
+	ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
+
+	/* initialize timeout */
+	to.tv_sec = 0;
+	to.tv_nsec = timeout_ns;
+
+	info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
+	res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
+	if (!res || errno != ETIMEDOUT) {
+		fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
+		ret = RET_FAIL;
+	}
+
+	print_result(TEST_NAME, ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
new file mode 100644
index 000000000..269753225
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -0,0 +1,126 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Wait on uninitialized heap. It shold be zero and FUTEX_WAIT should
+ *      return immediately. This test is intent to test zero page handling in
+ *      futex.
+ *
+ * AUTHOR
+ *      KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ *      2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <libgen.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait-uninitialized-heap"
+#define WAIT_US 5000000
+
+static int child_blocked = 1;
+static int child_ret;
+void *buf;
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+void *wait_thread(void *arg)
+{
+	int res;
+
+	child_ret = RET_PASS;
+	res = futex_wait(buf, 1, NULL, 0);
+	child_blocked = 0;
+
+	if (res != 0 && errno != EWOULDBLOCK) {
+		error("futex failure\n", errno);
+		child_ret = RET_ERROR;
+	}
+	pthread_exit(NULL);
+}
+
+int main(int argc, char **argv)
+{
+	int c, ret = RET_PASS;
+	long page_size;
+	pthread_t thr;
+
+	while ((c = getopt(argc, argv, "chv:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	page_size = sysconf(_SC_PAGESIZE);
+
+	buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
+		   MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+	if (buf == (void *)-1) {
+		error("mmap\n", errno);
+		exit(1);
+	}
+
+	ksft_print_header();
+	ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
+	       basename(argv[0]));
+
+
+	ret = pthread_create(&thr, NULL, wait_thread, NULL);
+	if (ret) {
+		error("pthread_create\n", errno);
+		ret = RET_ERROR;
+		goto out;
+	}
+
+	info("waiting %dus for child to return\n", WAIT_US);
+	usleep(WAIT_US);
+
+	ret = child_ret;
+	if (child_blocked) {
+		fail("child blocked in kernel\n");
+		ret = RET_FAIL;
+	}
+
+ out:
+	print_result(TEST_NAME, ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
new file mode 100644
index 000000000..da15a6326
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -0,0 +1,81 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Test if FUTEX_WAIT op returns -EWOULDBLOCK if the futex value differs
+ *      from the expected one.
+ *
+ * AUTHOR
+ *      Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ * HISTORY
+ *      2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-wait-wouldblock"
+#define timeout_ns 100000
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+	struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
+	futex_t f1 = FUTEX_INITIALIZER;
+	int res, ret = RET_PASS;
+	int c;
+
+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	ksft_print_header();
+	ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
+	       basename(argv[0]));
+
+	info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+	res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
+	if (!res || errno != EWOULDBLOCK) {
+		fail("futex_wait returned: %d %s\n",
+		     res ? errno : res, res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	}
+
+	print_result(TEST_NAME, ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
new file mode 100755
index 000000000..7ff002eed
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -0,0 +1,79 @@
+#!/bin/sh
+
+###############################################################################
+#
+#   Copyright © International Business Machines  Corp., 2009
+#
+#   This program is free software;  you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+# DESCRIPTION
+#      Run tests in the current directory.
+#
+# AUTHOR
+#      Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+#      2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+#      2010-Jan-6: Add futex_wait_uninitialized_heap and futex_wait_private_mapped_file
+#                  by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+#
+###############################################################################
+
+# Test for a color capable console
+if [ -z "$USE_COLOR" ]; then
+    tput setf 7 || tput setaf 7
+    if [ $? -eq 0 ]; then
+        USE_COLOR=1
+        tput sgr0
+    fi
+fi
+if [ "$USE_COLOR" -eq 1 ]; then
+    COLOR="-c"
+fi
+
+
+echo
+# requeue pi testing
+# without timeouts
+./futex_requeue_pi $COLOR
+./futex_requeue_pi $COLOR -b
+./futex_requeue_pi $COLOR -b -l
+./futex_requeue_pi $COLOR -b -o
+./futex_requeue_pi $COLOR -l
+./futex_requeue_pi $COLOR -o
+# with timeouts
+./futex_requeue_pi $COLOR -b -l -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -l -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+./futex_requeue_pi $COLOR -b -t 5000
+./futex_requeue_pi $COLOR -t 5000
+./futex_requeue_pi $COLOR -b -t 500000
+./futex_requeue_pi $COLOR -t 500000
+./futex_requeue_pi $COLOR -b -o -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -o -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+# with long timeout
+./futex_requeue_pi $COLOR -b -l -t 2000000000
+./futex_requeue_pi $COLOR -l -t 2000000000
+
+
+echo
+./futex_requeue_pi_mismatched_ops $COLOR
+
+echo
+./futex_requeue_pi_signal_restart $COLOR
+
+echo
+./futex_wait_timeout $COLOR
+
+echo
+./futex_wait_wouldblock $COLOR
+
+echo
+./futex_wait_uninitialized_heap $COLOR
+./futex_wait_private_mapped_file $COLOR
diff --git a/tools/testing/selftests/futex/include/atomic.h b/tools/testing/selftests/futex/include/atomic.h
new file mode 100644
index 000000000..f861da3e3
--- /dev/null
+++ b/tools/testing/selftests/futex/include/atomic.h
@@ -0,0 +1,83 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      GCC atomic builtin wrappers
+ *      http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * AUTHOR
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2009-Nov-17: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _ATOMIC_H
+#define _ATOMIC_H
+
+typedef struct {
+	volatile int val;
+} atomic_t;
+
+#define ATOMIC_INITIALIZER { 0 }
+
+/**
+ * atomic_cmpxchg() - Atomic compare and exchange
+ * @uaddr:	The address of the futex to be modified
+ * @oldval:	The expected value of the futex
+ * @newval:	The new value to try and assign the futex
+ *
+ * Return the old value of addr->val.
+ */
+static inline int
+atomic_cmpxchg(atomic_t *addr, int oldval, int newval)
+{
+	return __sync_val_compare_and_swap(&addr->val, oldval, newval);
+}
+
+/**
+ * atomic_inc() - Atomic incrememnt
+ * @addr:	Address of the variable to increment
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_inc(atomic_t *addr)
+{
+	return __sync_add_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_dec() - Atomic decrement
+ * @addr:	Address of the variable to decrement
+ *
+ * Return the new value of addr-val.
+ */
+static inline int
+atomic_dec(atomic_t *addr)
+{
+	return __sync_sub_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_set() - Atomic set
+ * @addr:	Address of the variable to set
+ * @newval:	New value for the atomic_t
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_set(atomic_t *addr, int newval)
+{
+	addr->val = newval;
+	return newval;
+}
+
+#endif
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
new file mode 100644
index 000000000..b98c3aba7
--- /dev/null
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -0,0 +1,266 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _FUTEXTEST_H
+#define _FUTEXTEST_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+typedef volatile u_int32_t futex_t;
+#define FUTEX_INITIALIZER 0
+
+/* Define the newer op codes if the system header file is not up to date. */
+#ifndef FUTEX_WAIT_BITSET
+#define FUTEX_WAIT_BITSET		9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+#define FUTEX_WAKE_BITSET		10
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI
+#define FUTEX_WAIT_REQUEUE_PI		11
+#endif
+#ifndef FUTEX_CMP_REQUEUE_PI
+#define FUTEX_CMP_REQUEUE_PI		12
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_REQUEUE_PI_PRIVATE
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
+#endif
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr:	address of first futex
+ * @op:		futex op code
+ * @val:	typically expected value of uaddr, but varies by op
+ * @timeout:	typically an absolute struct timespec (except where noted
+ *              otherwise). Overloaded by some ops
+ * @uaddr2:	address of second futex for some ops\
+ * @val3:	varies by op
+ * @opflags:	flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+	syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout:	relative timeout
+ */
+static inline int
+futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags)
+{
+	return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake:	wake up to this many tasks
+ */
+static inline int
+futex_wake(futex_t *uaddr, int nr_wake, int opflags)
+{
+	return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wait_bitset() - block on uaddr with bitset
+ * @bitset:	bitset to be used with futex_wake_bitset
+ */
+static inline int
+futex_wait_bitset(futex_t *uaddr, futex_t val, struct timespec *timeout,
+		  u_int32_t bitset, int opflags)
+{
+	return futex(uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, bitset,
+		     opflags);
+}
+
+/**
+ * futex_wake_bitset() - wake one or more tasks blocked on uaddr with bitset
+ * @bitset:	bitset to compare with that used in futex_wait_bitset
+ */
+static inline int
+futex_wake_bitset(futex_t *uaddr, int nr_wake, u_int32_t bitset, int opflags)
+{
+	return futex(uaddr, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset,
+		     opflags);
+}
+
+/**
+ * futex_lock_pi() - block on uaddr as a PI mutex
+ * @detect:	whether (1) or not (0) to perform deadlock detection
+ */
+static inline int
+futex_lock_pi(futex_t *uaddr, struct timespec *timeout, int detect,
+	      int opflags)
+{
+	return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
+ */
+static inline int
+futex_unlock_pi(futex_t *uaddr, int opflags)
+{
+	return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake_op() - FIXME: COME UP WITH A GOOD ONE LINE DESCRIPTION
+ */
+static inline int
+futex_wake_op(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_wake2,
+	      int wake_op, int opflags)
+{
+	return futex(uaddr, FUTEX_WAKE_OP, nr_wake, nr_wake2, uaddr2, wake_op,
+		     opflags);
+}
+
+/**
+ * futex_requeue() - requeue without expected value comparison, deprecated
+ * @nr_wake:	wake up to this many tasks
+ * @nr_requeue:	requeue up to this many tasks
+ *
+ * Due to its inherently racy implementation, futex_requeue() is deprecated in
+ * favor of futex_cmp_requeue().
+ */
+static inline int
+futex_requeue(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_requeue,
+	      int opflags)
+{
+	return futex(uaddr, FUTEX_REQUEUE, nr_wake, nr_requeue, uaddr2, 0,
+		     opflags);
+}
+
+/**
+ * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+ * @nr_wake:	wake up to this many tasks
+ * @nr_requeue:	requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+		  int nr_requeue, int opflags)
+{
+	return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+		     val, opflags);
+}
+
+/**
+ * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
+ * @uaddr:	non-PI futex source
+ * @uaddr2:	PI futex target
+ *
+ * This is the first half of the requeue_pi mechanism. It shall always be
+ * paired with futex_cmp_requeue_pi().
+ */
+static inline int
+futex_wait_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2,
+		      struct timespec *timeout, int opflags)
+{
+	return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
+		     opflags);
+}
+
+/**
+ * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 (PI aware)
+ * @uaddr:	non-PI futex source
+ * @uaddr2:	PI futex target
+ * @nr_wake:	wake up to this many tasks
+ * @nr_requeue:	requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+		     int nr_requeue, int opflags)
+{
+	return futex(uaddr, FUTEX_CMP_REQUEUE_PI, nr_wake, nr_requeue, uaddr2,
+		     val, opflags);
+}
+
+/**
+ * futex_cmpxchg() - atomic compare and exchange
+ * @uaddr:	The address of the futex to be modified
+ * @oldval:	The expected value of the futex
+ * @newval:	The new value to try and assign the futex
+ *
+ * Implement cmpxchg using gcc atomic builtins.
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * Return the old futex value.
+ */
+static inline u_int32_t
+futex_cmpxchg(futex_t *uaddr, u_int32_t oldval, u_int32_t newval)
+{
+	return __sync_val_compare_and_swap(uaddr, oldval, newval);
+}
+
+/**
+ * futex_dec() - atomic decrement of the futex value
+ * @uaddr:	The address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_dec(futex_t *uaddr)
+{
+	return __sync_sub_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_inc() - atomic increment of the futex value
+ * @uaddr:	the address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_inc(futex_t *uaddr)
+{
+	return __sync_add_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_set() - atomic decrement of the futex value
+ * @uaddr:	the address of the futex to be modified
+ * @newval:	New value for the atomic_t
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_set(futex_t *uaddr, u_int32_t newval)
+{
+	*uaddr = newval;
+	return newval;
+}
+
+#endif
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
new file mode 100644
index 000000000..01989644e
--- /dev/null
+++ b/tools/testing/selftests/futex/include/logging.h
@@ -0,0 +1,152 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _LOGGING_H
+#define _LOGGING_H
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/futex.h>
+#include "kselftest.h"
+
+/*
+ * Define PASS, ERROR, and FAIL strings with and without color escape
+ * sequences, default to no color.
+ */
+#define ESC 0x1B, '['
+#define BRIGHT '1'
+#define GREEN '3', '2'
+#define YELLOW '3', '3'
+#define RED '3', '1'
+#define ESCEND 'm'
+#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND
+#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND
+#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND
+#define RESET_COLOR ESC, '0', 'm'
+static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S',
+				  RESET_COLOR, 0};
+static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R',
+				   RESET_COLOR, 0};
+static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L',
+				  RESET_COLOR, 0};
+static const char INFO_NORMAL[] = " INFO";
+static const char PASS_NORMAL[] = " PASS";
+static const char ERROR_NORMAL[] = "ERROR";
+static const char FAIL_NORMAL[] = " FAIL";
+const char *INFO = INFO_NORMAL;
+const char *PASS = PASS_NORMAL;
+const char *ERROR = ERROR_NORMAL;
+const char *FAIL = FAIL_NORMAL;
+
+/* Verbosity setting for INFO messages */
+#define VQUIET    0
+#define VCRITICAL 1
+#define VINFO     2
+#define VMAX      VINFO
+int _verbose = VCRITICAL;
+
+/* Functional test return codes */
+#define RET_PASS   0
+#define RET_ERROR -1
+#define RET_FAIL  -2
+
+/**
+ * log_color() - Use colored output for PASS, ERROR, and FAIL strings
+ * @use_color:	use color (1) or not (0)
+ */
+void log_color(int use_color)
+{
+	if (use_color) {
+		PASS = PASS_COLOR;
+		ERROR = ERROR_COLOR;
+		FAIL = FAIL_COLOR;
+	} else {
+		PASS = PASS_NORMAL;
+		ERROR = ERROR_NORMAL;
+		FAIL = FAIL_NORMAL;
+	}
+}
+
+/**
+ * log_verbosity() - Set verbosity of test output
+ * @verbose:	Enable (1) verbose output or not (0)
+ *
+ * Currently setting verbose=1 will enable INFO messages and 0 will disable
+ * them. FAIL and ERROR messages are always displayed.
+ */
+void log_verbosity(int level)
+{
+	if (level > VMAX)
+		level = VMAX;
+	else if (level < 0)
+		level = 0;
+	_verbose = level;
+}
+
+/**
+ * print_result() - Print standard PASS | ERROR | FAIL results
+ * @ret:	the return value to be considered: 0 | RET_ERROR | RET_FAIL
+ *
+ * print_result() is primarily intended for functional tests.
+ */
+void print_result(const char *test_name, int ret)
+{
+	switch (ret) {
+	case RET_PASS:
+		ksft_test_result_pass("%s\n", test_name);
+		ksft_print_cnts();
+		return;
+	case RET_ERROR:
+		ksft_test_result_error("%s\n", test_name);
+		ksft_print_cnts();
+		return;
+	case RET_FAIL:
+		ksft_test_result_fail("%s\n", test_name);
+		ksft_print_cnts();
+		return;
+	}
+}
+
+/* log level macros */
+#define info(message, vargs...) \
+do { \
+	if (_verbose >= VINFO) \
+		fprintf(stderr, "\t%s: "message, INFO, ##vargs); \
+} while (0)
+
+#define error(message, err, args...) \
+do { \
+	if (_verbose >= VCRITICAL) {\
+		if (err) \
+			fprintf(stderr, "\t%s: %s: "message, \
+				ERROR, strerror(err), ##args); \
+		else \
+			fprintf(stderr, "\t%s: "message, ERROR, ##args); \
+	} \
+} while (0)
+
+#define fail(message, args...) \
+do { \
+	if (_verbose >= VCRITICAL) \
+		fprintf(stderr, "\t%s: "message, FAIL, ##args); \
+} while (0)
+
+#endif
diff --git a/tools/testing/selftests/futex/run.sh b/tools/testing/selftests/futex/run.sh
new file mode 100755
index 000000000..88bcb1767
--- /dev/null
+++ b/tools/testing/selftests/futex/run.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+###############################################################################
+#
+#   Copyright © International Business Machines  Corp., 2009
+#
+#   This program is free software;  you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+# DESCRIPTION
+#      Run all tests under the functional, performance, and stress directories.
+#      Format and summarize the results.
+#
+# AUTHOR
+#      Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+#      2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+#
+###############################################################################
+
+# Test for a color capable shell and pass the result to the subdir scripts
+USE_COLOR=0
+tput setf 7 || tput setaf 7
+if [ $? -eq 0 ]; then
+    USE_COLOR=1
+    tput sgr0
+fi
+export USE_COLOR
+
+(cd functional; ./run.sh)
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
new file mode 100755
index 000000000..a27e2eec3
--- /dev/null
+++ b/tools/testing/selftests/gen_kselftest_tar.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: GPL-2.0
+# gen_kselftest_tar
+# Generate kselftest tarball
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# main
+main()
+{
+	if [ "$#" -eq 0 ]; then
+		echo "$0: Generating default compression gzip"
+		copts="cvzf"
+		ext=".tar.gz"
+	else
+		case "$1" in
+			tar)
+				copts="cvf"
+				ext=".tar"
+				;;
+			targz)
+				copts="cvzf"
+				ext=".tar.gz"
+				;;
+			tarbz2)
+				copts="cvjf"
+				ext=".tar.bz2"
+				;;
+			tarxz)
+				copts="cvJf"
+				ext=".tar.xz"
+				;;
+			*)
+			echo "Unknown tarball format $1"
+			exit 1
+			;;
+	esac
+	fi
+
+	install_dir=./kselftest
+
+# Run install using INSTALL_KSFT_PATH override to generate install
+# directory
+./kselftest_install.sh
+tar $copts kselftest${ext} $install_dir
+echo "Kselftest archive kselftest${ext} created!"
+
+# clean up install directory
+rm -rf kselftest
+}
+
+main "$@"
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
new file mode 100644
index 000000000..7d14f743d
--- /dev/null
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -0,0 +1 @@
+gpio-mockup-chardev
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
new file mode 100644
index 000000000..59ea4c461
--- /dev/null
+++ b/tools/testing/selftests/gpio/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_PROGS := gpio-mockup.sh
+TEST_FILES := gpio-mockup-sysfs.sh $(BINARIES)
+BINARIES := gpio-mockup-chardev
+EXTRA_PROGS := ../gpiogpio-event-mon ../gpiogpio-hammer ../gpiolsgpio
+EXTRA_DIRS := ../gpioinclude/
+EXTRA_OBJS := ../gpiogpio-event-mon-in.o ../gpiogpio-event-mon.o
+EXTRA_OBJS += ../gpiogpio-hammer-in.o ../gpiogpio-utils.o ../gpiolsgpio-in.o
+EXTRA_OBJS += ../gpiolsgpio.o
+
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
+all: $(BINARIES)
+
+override define CLEAN
+	$(RM) $(BINARIES) $(EXTRA_PROGS) $(EXTRA_OBJS)
+	$(RM) -r $(EXTRA_DIRS)
+endef
+
+CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/
+LDLIBS += -lmount -I/usr/include/libmount
+
+$(BINARIES):| khdr
+$(BINARIES): ../../../gpio/gpio-utils.o
+
+../../../gpio/gpio-utils.o:
+	make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C ../../../gpio
diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config
new file mode 100644
index 000000000..abaa6902b
--- /dev/null
+++ b/tools/testing/selftests/gpio/config
@@ -0,0 +1,2 @@
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_MOCKUP=m
diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
new file mode 100644
index 000000000..aaa1e9f08
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
@@ -0,0 +1,327 @@
+/*
+ * GPIO chardev test helper
+ *
+ * Copyright (C) 2016 Bamvor Jian Zhang
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <libmount.h>
+#include <err.h>
+#include <dirent.h>
+#include <linux/gpio.h>
+#include "../../../gpio/gpio-utils.h"
+
+#define CONSUMER	"gpio-selftest"
+#define	GC_NUM		10
+enum direction {
+	OUT,
+	IN
+};
+
+static int get_debugfs(char **path)
+{
+	struct libmnt_context *cxt;
+	struct libmnt_table *tb;
+	struct libmnt_iter *itr = NULL;
+	struct libmnt_fs *fs;
+	int found = 0, ret;
+
+	cxt = mnt_new_context();
+	if (!cxt)
+		err(EXIT_FAILURE, "libmount context allocation failed");
+
+	itr = mnt_new_iter(MNT_ITER_FORWARD);
+	if (!itr)
+		err(EXIT_FAILURE, "failed to initialize libmount iterator");
+
+	if (mnt_context_get_mtab(cxt, &tb))
+		err(EXIT_FAILURE, "failed to read mtab");
+
+	while (mnt_table_next_fs(tb, itr, &fs) == 0) {
+		const char *type = mnt_fs_get_fstype(fs);
+
+		if (!strcmp(type, "debugfs")) {
+			found = 1;
+			break;
+		}
+	}
+	if (found) {
+		ret = asprintf(path, "%s/gpio", mnt_fs_get_target(fs));
+		if (ret < 0)
+			err(EXIT_FAILURE, "failed to format string");
+	}
+
+	mnt_free_iter(itr);
+	mnt_free_context(cxt);
+
+	if (!found)
+		return -1;
+
+	return 0;
+}
+
+static int gpio_debugfs_get(const char *consumer, int *dir, int *value)
+{
+	char *debugfs;
+	FILE *f;
+	char *line = NULL;
+	size_t len = 0;
+	char *cur;
+	int found = 0;
+
+	if (get_debugfs(&debugfs) != 0)
+		err(EXIT_FAILURE, "debugfs is not mounted");
+
+	f = fopen(debugfs, "r");
+	if (!f)
+		err(EXIT_FAILURE, "read from gpio debugfs failed");
+
+	/*
+	 * gpio-2   (                    |gpio-selftest               ) in  lo
+	 */
+	while (getline(&line, &len, f) != -1) {
+		cur = strstr(line, consumer);
+		if (cur == NULL)
+			continue;
+
+		cur = strchr(line, ')');
+		if (!cur)
+			continue;
+
+		cur += 2;
+		if (!strncmp(cur, "out", 3)) {
+			*dir = OUT;
+			cur += 4;
+		} else if (!strncmp(cur, "in", 2)) {
+			*dir = IN;
+			cur += 4;
+		}
+
+		if (!strncmp(cur, "hi", 2))
+			*value = 1;
+		else if (!strncmp(cur, "lo", 2))
+			*value = 0;
+
+		found = 1;
+		break;
+	}
+	free(debugfs);
+	fclose(f);
+	free(line);
+
+	if (!found)
+		return -1;
+
+	return 0;
+}
+
+static struct gpiochip_info *list_gpiochip(const char *gpiochip_name, int *ret)
+{
+	struct gpiochip_info *cinfo;
+	struct gpiochip_info *current;
+	const struct dirent *ent;
+	DIR *dp;
+	char *chrdev_name;
+	int fd;
+	int i = 0;
+
+	cinfo = calloc(sizeof(struct gpiochip_info) * 4, GC_NUM + 1);
+	if (!cinfo)
+		err(EXIT_FAILURE, "gpiochip_info allocation failed");
+
+	current = cinfo;
+	dp = opendir("/dev");
+	if (!dp) {
+		*ret = -errno;
+		goto error_out;
+	} else {
+		*ret = 0;
+	}
+
+	while (ent = readdir(dp), ent) {
+		if (check_prefix(ent->d_name, "gpiochip")) {
+			*ret = asprintf(&chrdev_name, "/dev/%s", ent->d_name);
+			if (*ret < 0)
+				goto error_out;
+
+			fd = open(chrdev_name, 0);
+			if (fd == -1) {
+				*ret = -errno;
+				fprintf(stderr, "Failed to open %s\n",
+					chrdev_name);
+				goto error_close_dir;
+			}
+			*ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, current);
+			if (*ret == -1) {
+				perror("Failed to issue CHIPINFO IOCTL\n");
+				goto error_close_dir;
+			}
+			close(fd);
+			if (strcmp(current->label, gpiochip_name) == 0
+			    || check_prefix(current->label, gpiochip_name)) {
+				*ret = 0;
+				current++;
+				i++;
+			}
+		}
+	}
+
+	if ((!*ret && i == 0) || *ret < 0) {
+		free(cinfo);
+		cinfo = NULL;
+	}
+	if (!*ret && i > 0) {
+		cinfo = realloc(cinfo, sizeof(struct gpiochip_info) * 4 * i);
+		*ret = i;
+	}
+
+error_close_dir:
+	closedir(dp);
+error_out:
+	if (*ret < 0)
+		err(EXIT_FAILURE, "list gpiochip failed: %s", strerror(*ret));
+
+	return cinfo;
+}
+
+int gpio_pin_test(struct gpiochip_info *cinfo, int line, int flag, int value)
+{
+	struct gpiohandle_data data;
+	unsigned int lines[] = {line};
+	int fd;
+	int debugfs_dir = IN;
+	int debugfs_value = 0;
+	int ret;
+
+	data.values[0] = value;
+	ret = gpiotools_request_linehandle(cinfo->name, lines, 1, flag, &data,
+					   CONSUMER);
+	if (ret < 0)
+		goto fail_out;
+	else
+		fd = ret;
+
+	ret = gpio_debugfs_get(CONSUMER, &debugfs_dir, &debugfs_value);
+	if (ret) {
+		ret = -EINVAL;
+		goto fail_out;
+	}
+	if (flag & GPIOHANDLE_REQUEST_INPUT) {
+		if (debugfs_dir != IN) {
+			errno = -EINVAL;
+			ret = -errno;
+		}
+	} else if (flag & GPIOHANDLE_REQUEST_OUTPUT) {
+		if (flag & GPIOHANDLE_REQUEST_ACTIVE_LOW)
+			debugfs_value = !debugfs_value;
+
+		if (!(debugfs_dir == OUT && value == debugfs_value)) {
+			errno = -EINVAL;
+			ret = -errno;
+		}
+	}
+	gpiotools_release_linehandle(fd);
+
+fail_out:
+	if (ret)
+		err(EXIT_FAILURE, "gpio<%s> line<%d> test flag<0x%x> value<%d>",
+		    cinfo->name, line, flag, value);
+
+	return ret;
+}
+
+void gpio_pin_tests(struct gpiochip_info *cinfo, unsigned int line)
+{
+	printf("line<%d>", line);
+	gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 0);
+	printf(".");
+	gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 1);
+	printf(".");
+	gpio_pin_test(cinfo, line,
+		      GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
+		      0);
+	printf(".");
+	gpio_pin_test(cinfo, line,
+		      GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
+		      1);
+	printf(".");
+	gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_INPUT, 0);
+	printf(".");
+}
+
+/*
+ * ./gpio-mockup-chardev gpio_chip_name_prefix is_valid_gpio_chip
+ * Return 0 if successful or exit with EXIT_FAILURE if test failed.
+ * gpio_chip_name_prefix: The prefix of gpiochip you want to test. E.g.
+ *			  gpio-mockup
+ * is_valid_gpio_chip:	  Whether the gpio_chip is valid. 1 means valid,
+ *			  0 means invalid which could not be found by
+ *			  list_gpiochip.
+ */
+int main(int argc, char *argv[])
+{
+	char *prefix;
+	int valid;
+	struct gpiochip_info *cinfo;
+	struct gpiochip_info *current;
+	int i;
+	int ret;
+
+	if (argc < 3) {
+		printf("Usage: %s prefix is_valid", argv[0]);
+		exit(EXIT_FAILURE);
+	}
+
+	prefix = argv[1];
+	valid = strcmp(argv[2], "true") == 0 ? 1 : 0;
+
+	printf("Test gpiochip %s: ", prefix);
+	cinfo = list_gpiochip(prefix, &ret);
+	if (!cinfo) {
+		if (!valid && ret == 0) {
+			printf("Invalid test successful\n");
+			ret = 0;
+			goto out;
+		} else {
+			ret = -EINVAL;
+			goto out;
+		}
+	} else if (cinfo && !valid) {
+		ret = -EINVAL;
+		goto out;
+	}
+	current = cinfo;
+	for (i = 0; i < ret; i++) {
+		gpio_pin_tests(current, 0);
+		gpio_pin_tests(current, current->lines - 1);
+		gpio_pin_tests(current, random() % current->lines);
+		current++;
+	}
+	ret = 0;
+	printf("successful\n");
+
+out:
+	if (ret)
+		fprintf(stderr, "gpio<%s> test failed\n", prefix);
+
+	if (cinfo)
+		free(cinfo);
+
+	if (ret)
+		exit(EXIT_FAILURE);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
new file mode 100755
index 000000000..dd269d877
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
@@ -0,0 +1,135 @@
+
+# SPDX-License-Identifier: GPL-2.0
+is_consistent()
+{
+	val=
+
+	active_low_sysfs=`cat $GPIO_SYSFS/gpio$nr/active_low`
+	val_sysfs=`cat $GPIO_SYSFS/gpio$nr/value`
+	dir_sysfs=`cat $GPIO_SYSFS/gpio$nr/direction`
+
+	gpio_this_debugfs=`cat $GPIO_DEBUGFS |grep "gpio-$nr" | sed "s/(.*)//g"`
+	dir_debugfs=`echo $gpio_this_debugfs | awk '{print $2}'`
+	val_debugfs=`echo $gpio_this_debugfs | awk '{print $3}'`
+	if [ $val_debugfs = "lo" ]; then
+		val=0
+	elif [ $val_debugfs = "hi" ]; then
+		val=1
+	fi
+
+	if [ $active_low_sysfs = "1" ]; then
+		if [ $val = "0" ]; then
+			val="1"
+		else
+			val="0"
+		fi
+	fi
+
+	if [ $val_sysfs = $val ] && [ $dir_sysfs = $dir_debugfs ]; then
+		echo -n "."
+	else
+		echo "test fail, exit"
+		die
+	fi
+}
+
+test_pin_logic()
+{
+	nr=$1
+	direction=$2
+	active_low=$3
+	value=$4
+
+	echo $direction > $GPIO_SYSFS/gpio$nr/direction
+	echo $active_low > $GPIO_SYSFS/gpio$nr/active_low
+	if [ $direction = "out" ]; then
+		echo $value > $GPIO_SYSFS/gpio$nr/value
+	fi
+	is_consistent $nr
+}
+
+test_one_pin()
+{
+	nr=$1
+
+	echo -n "test pin<$nr>"
+
+	echo $nr > $GPIO_SYSFS/export 2>/dev/null
+
+	if [ X$? != X0 ]; then
+		echo "test GPIO pin $nr failed"
+		die
+	fi
+
+	#"Checking if the sysfs is consistent with debugfs: "
+	is_consistent $nr
+
+	#"Checking the logic of active_low: "
+	test_pin_logic $nr out 1 1
+	test_pin_logic $nr out 1 0
+	test_pin_logic $nr out 0 1
+	test_pin_logic $nr out 0 0
+
+	#"Checking the logic of direction: "
+	test_pin_logic $nr in 1 1
+	test_pin_logic $nr out 1 0
+	test_pin_logic $nr low 0 1
+	test_pin_logic $nr high 0 0
+
+	echo $nr > $GPIO_SYSFS/unexport
+
+	echo "successful"
+}
+
+test_one_pin_fail()
+{
+	nr=$1
+
+	echo $nr > $GPIO_SYSFS/export 2>/dev/null
+
+	if [ X$? != X0 ]; then
+		echo "test invalid pin $nr successful"
+	else
+		echo "test invalid pin $nr failed"
+		echo $nr > $GPIO_SYSFS/unexport 2>/dev/null
+		die
+	fi
+}
+
+list_chip()
+{
+	echo `ls -d $GPIO_DRV_SYSFS/gpiochip* 2>/dev/null`
+}
+
+test_chip()
+{
+	chip=$1
+	name=`basename $chip`
+	base=`cat $chip/base`
+	ngpio=`cat $chip/ngpio`
+	printf "%-10s %-5s %-5s\n" $name $base $ngpio
+	if [ $ngpio = "0" ]; then
+		echo "number of gpio is zero is not allowed".
+	fi
+	test_one_pin $base
+	test_one_pin $(($base + $ngpio - 1))
+	test_one_pin $((( RANDOM % $ngpio )  + $base ))
+}
+
+test_chips_sysfs()
+{
+       gpiochip=`list_chip $module`
+       if [ X"$gpiochip" = X ]; then
+               if [ X"$valid" = Xfalse ]; then
+                       echo "successful"
+               else
+                       echo "fail"
+                       die
+               fi
+       else
+               for chip in $gpiochip; do
+                       test_chip $chip
+               done
+       fi
+}
+
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
new file mode 100755
index 000000000..7f35b9880
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+#exit status
+#1: Internal error
+#2: sysfs/debugfs not mount
+#3: insert module fail when gpio-mockup is a module.
+#4: Skip test including run as non-root user.
+#5: other reason.
+
+SYSFS=
+GPIO_SYSFS=
+GPIO_DRV_SYSFS=
+DEBUGFS=
+GPIO_DEBUGFS=
+dev_type=
+module=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+usage()
+{
+	echo "Usage:"
+	echo "$0 [-f] [-m name] [-t type]"
+	echo "-f:  full test. It maybe conflict with existence gpio device."
+	echo "-m:  module name, default name is gpio-mockup. It could also test"
+	echo "     other gpio device."
+	echo "-t:  interface type: chardev(char device) and sysfs(being"
+	echo "     deprecated). The first one is default"
+	echo ""
+	echo "$0 -h"
+	echo "This usage"
+}
+
+prerequisite()
+{
+	msg="skip all tests:"
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit $ksft_skip
+	fi
+	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+	if [ ! -d "$SYSFS" ]; then
+		echo $msg sysfs is not mounted >&2
+		exit 2
+	fi
+	GPIO_SYSFS=`echo $SYSFS/class/gpio`
+	GPIO_DRV_SYSFS=`echo $SYSFS/devices/platform/$module/gpio`
+	DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+	if [ ! -d "$DEBUGFS" ]; then
+		echo $msg debugfs is not mounted >&2
+		exit 2
+	fi
+	GPIO_DEBUGFS=`echo $DEBUGFS/gpio`
+	source gpio-mockup-sysfs.sh
+}
+
+try_insert_module()
+{
+	if [ -d "$GPIO_DRV_SYSFS" ]; then
+		echo "$GPIO_DRV_SYSFS exist. Skip insert module"
+	else
+		modprobe -q $module $1
+		if [ X$? != X0 ]; then
+			echo $msg insmod $module failed >&2
+			exit 3
+		fi
+	fi
+}
+
+remove_module()
+{
+	modprobe -r -q $module
+}
+
+die()
+{
+	remove_module
+	exit 5
+}
+
+test_chips()
+{
+	if [ X$dev_type = Xsysfs ]; then
+		echo "WARNING: sysfs ABI of gpio is going to deprecated."
+		test_chips_sysfs $*
+	else
+		$BASE/gpio-mockup-chardev $*
+	fi
+}
+
+gpio_test()
+{
+	param=$1
+	valid=$2
+
+	if [ X"$param" = X ]; then
+		die
+	fi
+	try_insert_module "gpio_mockup_ranges=$param"
+	echo -n "GPIO $module test with ranges: <"
+	echo "$param>: "
+	printf "%-10s %s\n" $param
+	test_chips $module $valid
+	remove_module
+}
+
+BASE=`dirname $0`
+
+dev_type=
+TEMP=`getopt -o fhm:t: -n '$0' -- "$@"`
+
+if [ "$?" != "0" ]; then
+        echo "Parameter process failed, Terminating..." >&2
+        exit 1
+fi
+
+# Note the quotes around `$TEMP': they are essential!
+eval set -- "$TEMP"
+
+while true; do
+	case $1 in
+	-f)
+		full_test=true
+		shift
+		;;
+	-h)
+		usage
+		exit
+		;;
+	-m)
+		module=$2
+		shift 2
+		;;
+	-t)
+		dev_type=$2
+		shift 2
+		;;
+	--)
+		shift
+		break
+		;;
+	*)
+		echo "Internal error!"
+		exit 1
+		;;
+	esac
+done
+
+if [ X"$module" = X ]; then
+	module="gpio-mockup"
+fi
+
+if [ X$dev_type != Xsysfs ]; then
+	dev_type="chardev"
+fi
+
+prerequisite
+
+echo "1.  Test dynamic allocation of gpio successful means insert gpiochip and"
+echo "    manipulate gpio pin successful"
+gpio_test "-1,32" true
+gpio_test "-1,32,-1,32" true
+gpio_test "-1,32,-1,32,-1,32" true
+if [ X$full_test = Xtrue ]; then
+	gpio_test "-1,32,32,64" true
+	gpio_test "-1,32,40,64,-1,5" true
+	gpio_test "-1,32,32,64,-1,32" true
+	gpio_test "0,32,32,64,-1,32,-1,32" true
+	gpio_test "-1,32,-1,32,0,32,32,64" true
+	echo "2.  Do basic test: successful means insert gpiochip and"
+	echo "    manipulate gpio pin successful"
+	gpio_test "0,32" true
+	gpio_test "0,32,32,64" true
+	gpio_test "0,32,40,64,64,96" true
+fi
+echo "3.  Error test: successful means insert gpiochip failed"
+echo "3.1 Test number of gpio overflow"
+#Currently: The max number of gpio(1024) is defined in arm architecture.
+gpio_test "-1,32,-1,1024" false
+if [ X$full_test = Xtrue ]; then
+	echo "3.2 Test zero line of gpio"
+	gpio_test "0,0" false
+	echo "3.3 Test range overlap"
+	echo "3.3.1 Test corner case"
+	gpio_test "0,32,0,1" false
+	gpio_test "0,32,32,64,32,40" false
+	gpio_test "0,32,35,64,35,45" false
+	gpio_test "0,32,31,32" false
+	gpio_test "0,32,32,64,36,37" false
+	gpio_test "0,32,35,64,34,36" false
+	echo "3.3.2 Test inserting invalid second gpiochip"
+	gpio_test "0,32,30,35" false
+	gpio_test "0,32,1,5" false
+	gpio_test "10,32,9,14" false
+	gpio_test "10,32,30,35" false
+	echo "3.3.3 Test others"
+	gpio_test "0,32,40,56,39,45" false
+	gpio_test "0,32,40,56,30,33" false
+	gpio_test "0,32,40,56,30,41" false
+	gpio_test "0,32,40,56,20,21" false
+fi
+
+echo GPIO test PASS
+
diff --git a/tools/testing/selftests/ia64/.gitignore b/tools/testing/selftests/ia64/.gitignore
new file mode 100644
index 000000000..ab806edc8
--- /dev/null
+++ b/tools/testing/selftests/ia64/.gitignore
@@ -0,0 +1 @@
+aliasing-test
diff --git a/tools/testing/selftests/ia64/Makefile b/tools/testing/selftests/ia64/Makefile
new file mode 100644
index 000000000..4bce1a84b
--- /dev/null
+++ b/tools/testing/selftests/ia64/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_PROGS := aliasing-test
+
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+	rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/ia64/aliasing-test.c b/tools/testing/selftests/ia64/aliasing-test.c
new file mode 100644
index 000000000..62a190d45
--- /dev/null
+++ b/tools/testing/selftests/ia64/aliasing-test.c
@@ -0,0 +1,263 @@
+/*
+ * Exercise /dev/mem mmap cases that have been troublesome in the past
+ *
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ *	Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/pci.h>
+
+int sum;
+
+static int map_mem(char *path, off_t offset, size_t length, int touch)
+{
+	int fd, rc;
+	void *addr;
+	int *c;
+
+	fd = open(path, O_RDWR);
+	if (fd == -1) {
+		perror(path);
+		return -1;
+	}
+
+	if (fnmatch("/proc/bus/pci/*", path, 0) == 0) {
+		rc = ioctl(fd, PCIIOC_MMAP_IS_MEM);
+		if (rc == -1)
+			perror("PCIIOC_MMAP_IS_MEM ioctl");
+	}
+
+	addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset);
+	if (addr == MAP_FAILED)
+		return 1;
+
+	if (touch) {
+		c = (int *) addr;
+		while (c < (int *) (addr + length))
+			sum += *c++;
+	}
+
+	rc = munmap(addr, length);
+	if (rc == -1) {
+		perror("munmap");
+		return -1;
+	}
+
+	close(fd);
+	return 0;
+}
+
+static int scan_tree(char *path, char *file, off_t offset, size_t length, int touch)
+{
+	struct dirent **namelist;
+	char *name, *path2;
+	int i, n, r, rc = 0, result = 0;
+	struct stat buf;
+
+	n = scandir(path, &namelist, 0, alphasort);
+	if (n < 0) {
+		perror("scandir");
+		return -1;
+	}
+
+	for (i = 0; i < n; i++) {
+		name = namelist[i]->d_name;
+
+		if (fnmatch(".", name, 0) == 0)
+			goto skip;
+		if (fnmatch("..", name, 0) == 0)
+			goto skip;
+
+		path2 = malloc(strlen(path) + strlen(name) + 3);
+		strcpy(path2, path);
+		strcat(path2, "/");
+		strcat(path2, name);
+
+		if (fnmatch(file, name, 0) == 0) {
+			rc = map_mem(path2, offset, length, touch);
+			if (rc == 0)
+				fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable");
+			else if (rc > 0)
+				fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length);
+			else {
+				fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length);
+				return rc;
+			}
+		} else {
+			r = lstat(path2, &buf);
+			if (r == 0 && S_ISDIR(buf.st_mode)) {
+				rc = scan_tree(path2, file, offset, length, touch);
+				if (rc < 0)
+					return rc;
+			}
+		}
+
+		result |= rc;
+		free(path2);
+
+skip:
+		free(namelist[i]);
+	}
+	free(namelist);
+	return result;
+}
+
+char buf[1024];
+
+static int read_rom(char *path)
+{
+	int fd, rc;
+	size_t size = 0;
+
+	fd = open(path, O_RDWR);
+	if (fd == -1) {
+		perror(path);
+		return -1;
+	}
+
+	rc = write(fd, "1", 2);
+	if (rc <= 0) {
+		close(fd);
+		perror("write");
+		return -1;
+	}
+
+	do {
+		rc = read(fd, buf, sizeof(buf));
+		if (rc > 0)
+			size += rc;
+	} while (rc > 0);
+
+	close(fd);
+	return size;
+}
+
+static int scan_rom(char *path, char *file)
+{
+	struct dirent **namelist;
+	char *name, *path2;
+	int i, n, r, rc = 0, result = 0;
+	struct stat buf;
+
+	n = scandir(path, &namelist, 0, alphasort);
+	if (n < 0) {
+		perror("scandir");
+		return -1;
+	}
+
+	for (i = 0; i < n; i++) {
+		name = namelist[i]->d_name;
+
+		if (fnmatch(".", name, 0) == 0)
+			goto skip;
+		if (fnmatch("..", name, 0) == 0)
+			goto skip;
+
+		path2 = malloc(strlen(path) + strlen(name) + 3);
+		strcpy(path2, path);
+		strcat(path2, "/");
+		strcat(path2, name);
+
+		if (fnmatch(file, name, 0) == 0) {
+			rc = read_rom(path2);
+
+			/*
+			 * It's OK if the ROM is unreadable.  Maybe there
+			 * is no ROM, or some other error occurred.  The
+			 * important thing is that no MCA happened.
+			 */
+			if (rc > 0)
+				fprintf(stderr, "PASS: %s read %d bytes\n", path2, rc);
+			else {
+				fprintf(stderr, "PASS: %s not readable\n", path2);
+				return rc;
+			}
+		} else {
+			r = lstat(path2, &buf);
+			if (r == 0 && S_ISDIR(buf.st_mode)) {
+				rc = scan_rom(path2, file);
+				if (rc < 0)
+					return rc;
+			}
+		}
+
+		result |= rc;
+		free(path2);
+
+skip:
+		free(namelist[i]);
+	}
+	free(namelist);
+	return result;
+}
+
+int main(void)
+{
+	int rc;
+
+	if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0)
+		fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n");
+	else
+		fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n");
+
+	/*
+	 * It's not safe to blindly read the VGA frame buffer.  If you know
+	 * how to poke the card the right way, it should respond, but it's
+	 * not safe in general.  Many machines, e.g., Intel chipsets, cover
+	 * up a non-responding card by just returning -1, but others will
+	 * report the failure as a machine check.
+	 */
+	if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0)
+		fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n");
+	else
+		fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n");
+
+	if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0)
+		fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n");
+	else
+		fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n");
+
+	/*
+	 * Often you can map all the individual pieces above (0-0xA0000,
+	 * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole
+	 * thing at once.  This is because the individual pieces use different
+	 * attributes, and there's no single attribute supported over the
+	 * whole region.
+	 */
+	rc = map_mem("/dev/mem", 0, 1024*1024, 0);
+	if (rc == 0)
+		fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n");
+	else if (rc > 0)
+		fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n");
+	else
+		fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n");
+
+	scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1);
+	scan_tree("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0);
+	scan_tree("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1);
+	scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0);
+
+	scan_rom("/sys/devices", "rom");
+
+	scan_tree("/proc/bus/pci", "??.?", 0, 0xA0000, 1);
+	scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0);
+	scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1);
+	scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0);
+
+	return rc;
+}
diff --git a/tools/testing/selftests/intel_pstate/.gitignore b/tools/testing/selftests/intel_pstate/.gitignore
new file mode 100644
index 000000000..3bfcbae5f
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/.gitignore
@@ -0,0 +1,2 @@
+aperf
+msr
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
new file mode 100644
index 000000000..7340fd6a9
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
+LDLIBS := $(LDLIBS) -lm
+
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq (x86,$(ARCH))
+TEST_GEN_FILES := msr aperf
+endif
+
+TEST_PROGS := run.sh
+
+include ../lib.mk
+
+$(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
new file mode 100644
index 000000000..f6cd03a87
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <math.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/timeb.h>
+#include <sched.h>
+#include <errno.h>
+#include <string.h>
+#include "../kselftest.h"
+
+void usage(char *name) {
+	printf ("Usage: %s cpunum\n", name);
+}
+
+int main(int argc, char **argv) {
+	unsigned int i, cpu, fd;
+	char msr_file_name[64];
+	long long tsc, old_tsc, new_tsc;
+	long long aperf, old_aperf, new_aperf;
+	long long mperf, old_mperf, new_mperf;
+	struct timeb before, after;
+	long long int start, finish, total;
+	cpu_set_t cpuset;
+
+	if (argc != 2) {
+		usage(argv[0]);
+		return 1;
+	}
+
+	errno = 0;
+	cpu = strtol(argv[1], (char **) NULL, 10);
+
+	if (errno) {
+		usage(argv[0]);
+		return 1;
+	}
+
+	sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu);
+	fd = open(msr_file_name, O_RDONLY);
+
+	if (fd == -1) {
+		printf("/dev/cpu/%d/msr: %s\n", cpu, strerror(errno));
+		return KSFT_SKIP;
+	}
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+
+	if (sched_setaffinity(0, sizeof(cpu_set_t), &cpuset)) {
+		perror("Failed to set cpu affinity");
+		return 1;
+	}
+
+	ftime(&before);
+	pread(fd, &old_tsc,  sizeof(old_tsc), 0x10);
+	pread(fd, &old_aperf,  sizeof(old_mperf), 0xe7);
+	pread(fd, &old_mperf,  sizeof(old_aperf), 0xe8);
+
+	for (i=0; i<0x8fffffff; i++) {
+		sqrt(i);
+	}
+
+	ftime(&after);
+	pread(fd, &new_tsc,  sizeof(new_tsc), 0x10);
+	pread(fd, &new_aperf,  sizeof(new_mperf), 0xe7);
+	pread(fd, &new_mperf,  sizeof(new_aperf), 0xe8);
+
+	tsc = new_tsc-old_tsc;
+	aperf = new_aperf-old_aperf;
+	mperf = new_mperf-old_mperf;
+
+	start = before.time*1000 + before.millitm;
+	finish = after.time*1000 + after.millitm;
+	total = finish - start;
+
+	printf("runTime: %4.2f\n", 1.0*total/1000);
+	printf("freq: %7.0f\n", tsc / (1.0*aperf / (1.0 * mperf)) / total);
+	return 0;
+}
diff --git a/tools/testing/selftests/intel_pstate/msr.c b/tools/testing/selftests/intel_pstate/msr.c
new file mode 100644
index 000000000..88fdd2a4b
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/msr.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <math.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/timeb.h>
+#include <sched.h>
+#include <errno.h>
+
+
+int main(int argc, char **argv) {
+	int cpu, fd;
+	long long msr;
+	char msr_file_name[64];
+
+	if (argc != 2)
+		return 1;
+
+	errno = 0;
+	cpu = strtol(argv[1], (char **) NULL, 10);
+
+	if (errno)
+		return 1;
+
+	sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu);
+	fd = open(msr_file_name, O_RDONLY);
+
+	if (fd == -1) {
+		perror("Failed to open");
+		return 1;
+	}
+
+	pread(fd, &msr,  sizeof(msr), 0x199);
+
+	printf("msr 0x199: 0x%llx\n", msr);
+	return 0;
+}
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
new file mode 100755
index 000000000..e7008f614
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test runs on Intel x86 based hardware which support the intel_pstate
+# driver.  The test checks the frequency settings from the maximum turbo
+# state to the minimum supported frequency, in decrements of 100MHz.  The
+# test runs the aperf.c program to put load on each processor.
+#
+# The results are displayed in a table which indicate the "Target" state,
+# or the requested frequency in MHz, the Actual frequency, as read from
+# /proc/cpuinfo, the difference between the Target and Actual frequencies,
+# and the value of MSR 0x199 (MSR_IA32_PERF_CTL) which indicates what
+# pstate the cpu is in, and the value of
+# /sys/devices/system/cpu/intel_pstate/max_perf_pct X maximum turbo state
+#
+# Notes: In some cases several frequency values may be placed in the
+# /tmp/result.X files.  This is done on purpose in order to catch cases
+# where the pstate driver may not be working at all.  There is the case
+# where, for example, several "similar" frequencies are in the file:
+#
+#
+#/tmp/result.3100:1:cpu MHz              : 2899.980
+#/tmp/result.3100:2:cpu MHz              : 2900.000
+#/tmp/result.3100:3:msr 0x199: 0x1e00
+#/tmp/result.3100:4:max_perf_pct 94
+#
+# and the test will error out in those cases.  The result.X file can be checked
+# for consistency and modified to remove the extra MHz values.  The result.X
+# files can be re-evaluated by setting EVALUATE_ONLY to 1 below.
+
+EVALUATE_ONLY=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
+	echo "$0 # Skipped: Test can only run on x86 architectures."
+	exit $ksft_skip
+fi
+
+msg="skip all tests:"
+if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then
+    echo $msg please run this as root >&2
+    exit $ksft_skip
+fi
+
+max_cpus=$(($(nproc)-1))
+
+function run_test () {
+
+	file_ext=$1
+	for cpu in `seq 0 $max_cpus`
+	do
+		echo "launching aperf load on $cpu"
+		./aperf $cpu &
+	done
+
+	echo "sleeping for 5 seconds"
+	sleep 5
+	grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs
+	num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ')
+	if [ $num_freqs -ge 2 ]; then
+		tail -n 1 /tmp/result.freqs > /tmp/result.$1
+	else
+		cp /tmp/result.freqs /tmp/result.$1
+	fi
+	./msr 0 >> /tmp/result.$1
+
+	max_perf_pct=$(cat /sys/devices/system/cpu/intel_pstate/max_perf_pct)
+	echo "max_perf_pct $max_perf_pct" >> /tmp/result.$1
+
+	for job in `jobs -p`
+	do
+		echo "waiting for job id $job"
+		wait $job
+	done
+}
+
+#
+# MAIN (ALL UNITS IN MHZ)
+#
+
+# Get the marketing frequency
+_mkt_freq=$(cat /proc/cpuinfo | grep -m 1 "model name" | awk '{print $NF}')
+_mkt_freq=$(echo $_mkt_freq | tr -d [:alpha:][:punct:])
+mkt_freq=${_mkt_freq}0
+
+# Get the ranges from cpupower
+_min_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $1 } ')
+min_freq=$(($_min_freq / 1000))
+_max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ')
+max_freq=$(($_max_freq / 1000))
+
+
+[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq`
+do
+	echo "Setting maximum frequency to $freq"
+	cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null
+	run_test $freq
+done
+
+[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
+
+echo "========================================================================"
+echo "The marketing frequency of the cpu is $mkt_freq MHz"
+echo "The maximum frequency of the cpu is $max_freq MHz"
+echo "The minimum frequency of the cpu is $min_freq MHz"
+
+# make a pretty table
+echo "Target Actual Difference MSR(0x199) max_perf_pct" | tr " " "\n" > /tmp/result.tab
+for freq in `seq $max_freq -100 $min_freq`
+do
+	result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ')
+	msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ')
+	max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' )
+	cat >> /tmp/result.tab << EOF
+$freq
+$result_freq
+$((result_freq - freq))
+$msr
+$((max_perf_pct * max_freq))
+EOF
+done
+
+# print the table
+pr -aTt -5 < /tmp/result.tab
+
+exit 0
diff --git a/tools/testing/selftests/ipc/.gitignore b/tools/testing/selftests/ipc/.gitignore
new file mode 100644
index 000000000..9af04c935
--- /dev/null
+++ b/tools/testing/selftests/ipc/.gitignore
@@ -0,0 +1,2 @@
+msgque_test
+msgque
diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile
new file mode 100644
index 000000000..1c4448a84
--- /dev/null
+++ b/tools/testing/selftests/ipc/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        ARCH := x86
+	CFLAGS := -DCONFIG_X86_32 -D__i386__
+endif
+ifeq ($(ARCH),x86_64)
+	ARCH := x86
+	CFLAGS := -DCONFIG_X86_64 -D__x86_64__
+endif
+
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := msgque
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/ipc/config b/tools/testing/selftests/ipc/config
new file mode 100644
index 000000000..070244710
--- /dev/null
+++ b/tools/testing/selftests/ipc/config
@@ -0,0 +1,2 @@
+CONFIG_EXPERT=y
+CONFIG_CHECKPOINT_RESTORE=y
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
new file mode 100644
index 000000000..5ec4d9e18
--- /dev/null
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/msg.h>
+#include <fcntl.h>
+
+#include "../kselftest.h"
+
+#define MAX_MSG_SIZE		32
+
+struct msg1 {
+	int msize;
+	long mtype;
+	char mtext[MAX_MSG_SIZE];
+};
+
+#define TEST_STRING "Test sysv5 msg"
+#define MSG_TYPE 1
+
+#define ANOTHER_TEST_STRING "Yet another test sysv5 msg"
+#define ANOTHER_MSG_TYPE 26538
+
+struct msgque_data {
+	key_t key;
+	int msq_id;
+	int qbytes;
+	int qnum;
+	int mode;
+	struct msg1 *messages;
+};
+
+int restore_queue(struct msgque_data *msgque)
+{
+	int fd, ret, id, i;
+	char buf[32];
+
+	fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
+	if (fd == -1) {
+		printf("Failed to open /proc/sys/kernel/msg_next_id\n");
+		return -errno;
+	}
+	sprintf(buf, "%d", msgque->msq_id);
+
+	ret = write(fd, buf, strlen(buf));
+	if (ret != strlen(buf)) {
+		printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
+		return -errno;
+	}
+
+	id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
+	if (id == -1) {
+		printf("Failed to create queue\n");
+		return -errno;
+	}
+
+	if (id != msgque->msq_id) {
+		printf("Restored queue has wrong id (%d instead of %d)\n",
+							id, msgque->msq_id);
+		ret = -EFAULT;
+		goto destroy;
+	}
+
+	for (i = 0; i < msgque->qnum; i++) {
+		if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
+			   msgque->messages[i].msize, IPC_NOWAIT) != 0) {
+			printf("msgsnd failed (%m)\n");
+			ret = -errno;
+			goto destroy;
+		};
+	}
+	return 0;
+
+destroy:
+	if (msgctl(id, IPC_RMID, NULL))
+		printf("Failed to destroy queue: %d\n", -errno);
+	return ret;
+}
+
+int check_and_destroy_queue(struct msgque_data *msgque)
+{
+	struct msg1 message;
+	int cnt = 0, ret;
+
+	while (1) {
+		ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE,
+				0, IPC_NOWAIT);
+		if (ret < 0) {
+			if (errno == ENOMSG)
+				break;
+			printf("Failed to read IPC message: %m\n");
+			ret = -errno;
+			goto err;
+		}
+		if (ret != msgque->messages[cnt].msize) {
+			printf("Wrong message size: %d (expected %d)\n", ret,
+						msgque->messages[cnt].msize);
+			ret = -EINVAL;
+			goto err;
+		}
+		if (message.mtype != msgque->messages[cnt].mtype) {
+			printf("Wrong message type\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
+			printf("Wrong message content\n");
+			ret = -EINVAL;
+			goto err;
+		}
+		cnt++;
+	}
+
+	if (cnt != msgque->qnum) {
+		printf("Wrong message number\n");
+		ret = -EINVAL;
+		goto err;
+	}
+
+	ret = 0;
+err:
+	if (msgctl(msgque->msq_id, IPC_RMID, NULL)) {
+		printf("Failed to destroy queue: %d\n", -errno);
+		return -errno;
+	}
+	return ret;
+}
+
+int dump_queue(struct msgque_data *msgque)
+{
+	struct msqid_ds ds;
+	int kern_id;
+	int i, ret;
+
+	for (kern_id = 0; kern_id < 256; kern_id++) {
+		ret = msgctl(kern_id, MSG_STAT, &ds);
+		if (ret < 0) {
+			if (errno == EINVAL)
+				continue;
+			printf("Failed to get stats for IPC queue with id %d\n",
+					kern_id);
+			return -errno;
+		}
+
+		if (ret == msgque->msq_id)
+			break;
+	}
+
+	msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
+	if (msgque->messages == NULL) {
+		printf("Failed to get stats for IPC queue\n");
+		return -ENOMEM;
+	}
+
+	msgque->qnum = ds.msg_qnum;
+	msgque->mode = ds.msg_perm.mode;
+	msgque->qbytes = ds.msg_qbytes;
+
+	for (i = 0; i < msgque->qnum; i++) {
+		ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
+				MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
+		if (ret < 0) {
+			printf("Failed to copy IPC message: %m (%d)\n", errno);
+			return -errno;
+		}
+		msgque->messages[i].msize = ret;
+	}
+	return 0;
+}
+
+int fill_msgque(struct msgque_data *msgque)
+{
+	struct msg1 msgbuf;
+
+	msgbuf.mtype = MSG_TYPE;
+	memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
+	if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
+				IPC_NOWAIT) != 0) {
+		printf("First message send failed (%m)\n");
+		return -errno;
+	};
+
+	msgbuf.mtype = ANOTHER_MSG_TYPE;
+	memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
+	if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
+				IPC_NOWAIT) != 0) {
+		printf("Second message send failed (%m)\n");
+		return -errno;
+	};
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int msg, pid, err;
+	struct msgque_data msgque;
+
+	if (getuid() != 0)
+		return ksft_exit_skip(
+				"Please run the test as root - Exiting.\n");
+
+	msgque.key = ftok(argv[0], 822155650);
+	if (msgque.key == -1) {
+		printf("Can't make key: %d\n", -errno);
+		return ksft_exit_fail();
+	}
+
+	msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
+	if (msgque.msq_id == -1) {
+		err = -errno;
+		printf("Can't create queue: %d\n", err);
+		goto err_out;
+	}
+
+	err = fill_msgque(&msgque);
+	if (err) {
+		printf("Failed to fill queue: %d\n", err);
+		goto err_destroy;
+	}
+
+	err = dump_queue(&msgque);
+	if (err) {
+		printf("Failed to dump queue: %d\n", err);
+		goto err_destroy;
+	}
+
+	err = check_and_destroy_queue(&msgque);
+	if (err) {
+		printf("Failed to check and destroy queue: %d\n", err);
+		goto err_out;
+	}
+
+	err = restore_queue(&msgque);
+	if (err) {
+		printf("Failed to restore queue: %d\n", err);
+		goto err_destroy;
+	}
+
+	err = check_and_destroy_queue(&msgque);
+	if (err) {
+		printf("Failed to test queue: %d\n", err);
+		goto err_out;
+	}
+	return ksft_exit_pass();
+
+err_destroy:
+	if (msgctl(msgque.msq_id, IPC_RMID, NULL)) {
+		printf("Failed to destroy queue: %d\n", -errno);
+		return ksft_exit_fail();
+	}
+err_out:
+	return ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/kcmp/.gitignore b/tools/testing/selftests/kcmp/.gitignore
new file mode 100644
index 000000000..5a9b3732b
--- /dev/null
+++ b/tools/testing/selftests/kcmp/.gitignore
@@ -0,0 +1,2 @@
+kcmp_test
+kcmp-test-file
diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
new file mode 100644
index 000000000..47aa9887f
--- /dev/null
+++ b/tools/testing/selftests/kcmp/Makefile
@@ -0,0 +1,8 @@
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := kcmp_test
+
+EXTRA_CLEAN := $(OUTPUT)/kcmp-test-file
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
new file mode 100644
index 000000000..6ea7b9f37
--- /dev/null
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <linux/kcmp.h>
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/epoll.h>
+
+#include "../kselftest.h"
+
+static long sys_kcmp(int pid1, int pid2, int type, unsigned long fd1, unsigned long fd2)
+{
+	return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2);
+}
+
+static const unsigned int duped_num = 64;
+
+int main(int argc, char **argv)
+{
+	const char kpath[] = "kcmp-test-file";
+	struct kcmp_epoll_slot epoll_slot;
+	struct epoll_event ev;
+	int pid1, pid2;
+	int pipefd[2];
+	int fd1, fd2;
+	int epollfd;
+	int status;
+	int fddup;
+
+	fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644);
+	pid1 = getpid();
+
+	if (fd1 < 0) {
+		perror("Can't create file");
+		ksft_exit_fail();
+	}
+
+	if (pipe(pipefd)) {
+		perror("Can't create pipe");
+		ksft_exit_fail();
+	}
+
+	epollfd = epoll_create1(0);
+	if (epollfd < 0) {
+		perror("epoll_create1 failed");
+		ksft_exit_fail();
+	}
+
+	memset(&ev, 0xff, sizeof(ev));
+	ev.events = EPOLLIN | EPOLLOUT;
+
+	if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipefd[0], &ev)) {
+		perror("epoll_ctl failed");
+		ksft_exit_fail();
+	}
+
+	fddup = dup2(pipefd[1], duped_num);
+	if (fddup < 0) {
+		perror("dup2 failed");
+		ksft_exit_fail();
+	}
+
+	if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fddup, &ev)) {
+		perror("epoll_ctl failed");
+		ksft_exit_fail();
+	}
+	close(fddup);
+
+	pid2 = fork();
+	if (pid2 < 0) {
+		perror("fork failed");
+		ksft_exit_fail();
+	}
+
+	if (!pid2) {
+		int pid2 = getpid();
+		int ret;
+
+		fd2 = open(kpath, O_RDWR, 0644);
+		if (fd2 < 0) {
+			perror("Can't open file");
+			ksft_exit_fail();
+		}
+
+		/* An example of output and arguments */
+		printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld "
+		       "FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld "
+		       "INV: %2ld\n",
+		       pid1, pid2,
+		       sys_kcmp(pid1, pid2, KCMP_FILE,		fd1, fd2),
+		       sys_kcmp(pid1, pid2, KCMP_FILES,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_VM,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_FS,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_SIGHAND,	0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_IO,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_SYSVSEM,	0, 0),
+
+			/* This one should fail */
+		       sys_kcmp(pid1, pid2, KCMP_TYPES + 1,	0, 0));
+
+		/* This one should return same fd */
+		ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1);
+		if (ret) {
+			printf("FAIL: 0 expected but %d returned (%s)\n",
+				ret, strerror(errno));
+			ksft_inc_fail_cnt();
+			ret = -1;
+		} else {
+			printf("PASS: 0 returned as expected\n");
+			ksft_inc_pass_cnt();
+		}
+
+		/* Compare with self */
+		ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0);
+		if (ret) {
+			printf("FAIL: 0 expected but %d returned (%s)\n",
+				ret, strerror(errno));
+			ksft_inc_fail_cnt();
+			ret = -1;
+		} else {
+			printf("PASS: 0 returned as expected\n");
+			ksft_inc_pass_cnt();
+		}
+
+		/* Compare epoll target */
+		epoll_slot = (struct kcmp_epoll_slot) {
+			.efd	= epollfd,
+			.tfd	= duped_num,
+			.toff	= 0,
+		};
+		ret = sys_kcmp(pid1, pid1, KCMP_EPOLL_TFD, pipefd[1],
+			       (unsigned long)(void *)&epoll_slot);
+		if (ret) {
+			printf("FAIL: 0 expected but %d returned (%s)\n",
+				ret, strerror(errno));
+			ksft_inc_fail_cnt();
+			ret = -1;
+		} else {
+			printf("PASS: 0 returned as expected\n");
+			ksft_inc_pass_cnt();
+		}
+
+		ksft_print_cnts();
+
+		if (ret)
+			ksft_exit_fail();
+		else
+			ksft_exit_pass();
+	}
+
+	waitpid(pid2, &status, P_ALL);
+
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/kmod/Makefile b/tools/testing/selftests/kmod/Makefile
new file mode 100644
index 000000000..fa2ccc5fb
--- /dev/null
+++ b/tools/testing/selftests/kmod/Makefile
@@ -0,0 +1,11 @@
+# Makefile for kmod loading selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := kmod.sh
+
+include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/kmod/config b/tools/testing/selftests/kmod/config
new file mode 100644
index 000000000..259f4fd6b
--- /dev/null
+++ b/tools/testing/selftests/kmod/config
@@ -0,0 +1,7 @@
+CONFIG_TEST_KMOD=m
+CONFIG_TEST_LKM=m
+CONFIG_XFS_FS=m
+
+# For the module parameter force_init_test is used
+CONFIG_TUN=m
+CONFIG_BTRFS_FS=m
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
new file mode 100755
index 000000000..1f118916a
--- /dev/null
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -0,0 +1,623 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or at your option any
+# later version; or, when distributed separately from the Linux kernel or
+# when incorporated into other software packages, subject to the following
+# license:
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of copyleft-next (version 0.3.1 or later) as published
+# at http://copyleft-next.org/.
+
+# This is a stress test script for kmod, the kernel module loader. It uses
+# test_kmod which exposes a series of knobs for the API for us so we can
+# tweak each test in userspace rather than in kernelspace.
+#
+# The way kmod works is it uses the kernel's usermode helper API to eventually
+# call /sbin/modprobe. It has a limit of the number of concurrent calls
+# possible. The kernel interface to load modules is request_module(), however
+# mount uses get_fs_type(). Both behave slightly differently, but the
+# differences are important enough to test each call separately. For this
+# reason test_kmod starts by providing tests for both calls.
+#
+# The test driver test_kmod assumes a series of defaults which you can
+# override by exporting to your environment prior running this script.
+# For instance this script assumes you do not have xfs loaded upon boot.
+# If this is false, export DEFAULT_KMOD_FS="ext4" prior to running this
+# script if the filesyste module you don't have loaded upon bootup
+# is ext4 instead. Refer to allow_user_defaults() for a list of user
+# override variables possible.
+#
+# You'll want at least 4 GiB of RAM to expect to run these tests
+# without running out of memory on them. For other requirements refer
+# to test_reqs()
+
+set -e
+
+TEST_NAME="kmod"
+TEST_DRIVER="test_${TEST_NAME}"
+TEST_DIR=$(dirname $0)
+
+# This represents
+#
+# TEST_ID:TEST_COUNT:ENABLED
+#
+# TEST_ID: is the test id number
+# TEST_COUNT: number of times we should run the test
+# ENABLED: 1 if enabled, 0 otherwise
+#
+# Once these are enabled please leave them as-is. Write your own test,
+# we have tons of space.
+ALL_TESTS="0001:3:1"
+ALL_TESTS="$ALL_TESTS 0002:3:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1"
+ALL_TESTS="$ALL_TESTS 0005:10:1"
+ALL_TESTS="$ALL_TESTS 0006:10:1"
+ALL_TESTS="$ALL_TESTS 0007:5:1"
+ALL_TESTS="$ALL_TESTS 0008:150:1"
+ALL_TESTS="$ALL_TESTS 0009:150:1"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+test_modprobe()
+{
+       if [ ! -d $DIR ]; then
+               echo "$0: $DIR not present" >&2
+               echo "You must have the following enabled in your kernel:" >&2
+               cat $TEST_DIR/config >&2
+               exit $ksft_skip
+       fi
+}
+
+function allow_user_defaults()
+{
+	if [ -z $DEFAULT_KMOD_DRIVER ]; then
+		DEFAULT_KMOD_DRIVER="test_module"
+	fi
+
+	if [ -z $DEFAULT_KMOD_FS ]; then
+		DEFAULT_KMOD_FS="xfs"
+	fi
+
+	if [ -z $PROC_DIR ]; then
+		PROC_DIR="/proc/sys/kernel/"
+	fi
+
+	if [ -z $MODPROBE_LIMIT ]; then
+		MODPROBE_LIMIT=50
+	fi
+
+	if [ -z $DIR ]; then
+		DIR="/sys/devices/virtual/misc/${TEST_DRIVER}0/"
+	fi
+
+	if [ -z $DEFAULT_NUM_TESTS ]; then
+		DEFAULT_NUM_TESTS=150
+	fi
+
+	MODPROBE_LIMIT_FILE="${PROC_DIR}/kmod-limit"
+}
+
+test_reqs()
+{
+	if ! which modprobe 2> /dev/null > /dev/null; then
+		echo "$0: You need modprobe installed" >&2
+		exit $ksft_skip
+	fi
+
+	if ! which kmod 2> /dev/null > /dev/null; then
+		echo "$0: You need kmod installed" >&2
+		exit $ksft_skip
+	fi
+
+	# kmod 19 has a bad bug where it returns 0 when modprobe
+	# gets called *even* if the module was not loaded due to
+	# some bad heuristics. For details see:
+	#
+	# A work around is possible in-kernel but its rather
+	# complex.
+	KMOD_VERSION=$(kmod --version | awk '{print $3}')
+	if [[ $KMOD_VERSION  -le 19 ]]; then
+		echo "$0: You need at least kmod 20" >&2
+		echo "kmod <= 19 is buggy, for details see:" >&2
+		echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
+		exit $ksft_skip
+	fi
+
+	uid=$(id -u)
+	if [ $uid -ne 0 ]; then
+		echo $msg must be run as root >&2
+		exit $ksft_skip
+	fi
+}
+
+function load_req_mod()
+{
+	trap "test_modprobe" EXIT
+
+	if [ ! -d $DIR ]; then
+		# Alanis: "Oh isn't it ironic?"
+		modprobe $TEST_DRIVER
+	fi
+}
+
+test_finish()
+{
+	echo "Test completed"
+}
+
+errno_name_to_val()
+{
+	case "$1" in
+	# kmod calls modprobe and upon of a module not found
+	# modprobe returns just 1... However in the kernel we
+	# *sometimes* see 256...
+	MODULE_NOT_FOUND)
+		echo 256;;
+	SUCCESS)
+		echo 0;;
+	-EPERM)
+		echo -1;;
+	-ENOENT)
+		echo -2;;
+	-EINVAL)
+		echo -22;;
+	-ERR_ANY)
+		echo -123456;;
+	*)
+		echo invalid;;
+	esac
+}
+
+errno_val_to_name()
+	case "$1" in
+	256)
+		echo MODULE_NOT_FOUND;;
+	0)
+		echo SUCCESS;;
+	-1)
+		echo -EPERM;;
+	-2)
+		echo -ENOENT;;
+	-22)
+		echo -EINVAL;;
+	-123456)
+		echo -ERR_ANY;;
+	*)
+		echo invalid;;
+	esac
+
+config_set_test_case_driver()
+{
+	if ! echo -n 1 >$DIR/config_test_case; then
+		echo "$0: Unable to set to test case to driver" >&2
+		exit 1
+	fi
+}
+
+config_set_test_case_fs()
+{
+	if ! echo -n 2 >$DIR/config_test_case; then
+		echo "$0: Unable to set to test case to fs" >&2
+		exit 1
+	fi
+}
+
+config_num_threads()
+{
+	if ! echo -n $1 >$DIR/config_num_threads; then
+		echo "$0: Unable to set to number of threads" >&2
+		exit 1
+	fi
+}
+
+config_get_modprobe_limit()
+{
+	if [[ -f ${MODPROBE_LIMIT_FILE} ]] ; then
+		MODPROBE_LIMIT=$(cat $MODPROBE_LIMIT_FILE)
+	fi
+	echo $MODPROBE_LIMIT
+}
+
+config_num_thread_limit_extra()
+{
+	MODPROBE_LIMIT=$(config_get_modprobe_limit)
+	let EXTRA_LIMIT=$MODPROBE_LIMIT+$1
+	config_num_threads $EXTRA_LIMIT
+}
+
+# For special characters use printf directly,
+# refer to kmod_test_0001
+config_set_driver()
+{
+	if ! echo -n $1 >$DIR/config_test_driver; then
+		echo "$0: Unable to set driver" >&2
+		exit 1
+	fi
+}
+
+config_set_fs()
+{
+	if ! echo -n $1 >$DIR/config_test_fs; then
+		echo "$0: Unable to set driver" >&2
+		exit 1
+	fi
+}
+
+config_get_driver()
+{
+	cat $DIR/config_test_driver
+}
+
+config_get_test_result()
+{
+	cat $DIR/test_result
+}
+
+config_reset()
+{
+	if ! echo -n "1" >"$DIR"/reset; then
+		echo "$0: reset shuld have worked" >&2
+		exit 1
+	fi
+}
+
+config_show_config()
+{
+	echo "----------------------------------------------------"
+	cat "$DIR"/config
+	echo "----------------------------------------------------"
+}
+
+config_trigger()
+{
+	if ! echo -n "1" >"$DIR"/trigger_config 2>/dev/null; then
+		echo "$1: FAIL - loading should have worked"
+		config_show_config
+		exit 1
+	fi
+	echo "$1: OK! - loading kmod test"
+}
+
+config_trigger_want_fail()
+{
+	if echo "1" > $DIR/trigger_config 2>/dev/null; then
+		echo "$1: FAIL - test case was expected to fail"
+		config_show_config
+		exit 1
+	fi
+	echo "$1: OK! - kmod test case failed as expected"
+}
+
+config_expect_result()
+{
+	RC=$(config_get_test_result)
+	RC_NAME=$(errno_val_to_name $RC)
+
+	ERRNO_NAME=$2
+	ERRNO=$(errno_name_to_val $ERRNO_NAME)
+
+	if [[ $ERRNO_NAME = "-ERR_ANY" ]]; then
+		if [[ $RC -ge 0 ]]; then
+			echo "$1: FAIL, test expects $ERRNO_NAME - got $RC_NAME ($RC)" >&2
+			config_show_config
+			exit 1
+		fi
+	elif [[ $RC != $ERRNO ]]; then
+		echo "$1: FAIL, test expects $ERRNO_NAME ($ERRNO) - got $RC_NAME ($RC)" >&2
+		config_show_config
+		exit 1
+	fi
+	echo "$1: OK! - Return value: $RC ($RC_NAME), expected $ERRNO_NAME"
+}
+
+kmod_defaults_driver()
+{
+	config_reset
+	modprobe -r $DEFAULT_KMOD_DRIVER
+	config_set_driver $DEFAULT_KMOD_DRIVER
+}
+
+kmod_defaults_fs()
+{
+	config_reset
+	modprobe -r $DEFAULT_KMOD_FS
+	config_set_fs $DEFAULT_KMOD_FS
+	config_set_test_case_fs
+}
+
+kmod_test_0001_driver()
+{
+	NAME='\000'
+
+	kmod_defaults_driver
+	config_num_threads 1
+	printf '\000' >"$DIR"/config_test_driver
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
+}
+
+kmod_test_0001_fs()
+{
+	NAME='\000'
+
+	kmod_defaults_fs
+	config_num_threads 1
+	printf '\000' >"$DIR"/config_test_fs
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} -EINVAL
+}
+
+kmod_test_0001()
+{
+	kmod_test_0001_driver
+	kmod_test_0001_fs
+}
+
+kmod_test_0002_driver()
+{
+	NAME="nope-$DEFAULT_KMOD_DRIVER"
+
+	kmod_defaults_driver
+	config_set_driver $NAME
+	config_num_threads 1
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
+}
+
+kmod_test_0002_fs()
+{
+	NAME="nope-$DEFAULT_KMOD_FS"
+
+	kmod_defaults_fs
+	config_set_fs $NAME
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} -EINVAL
+}
+
+kmod_test_0002()
+{
+	kmod_test_0002_driver
+	kmod_test_0002_fs
+}
+
+kmod_test_0003()
+{
+	kmod_defaults_fs
+	config_num_threads 1
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0004()
+{
+	kmod_defaults_fs
+	config_num_threads 2
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0005()
+{
+	kmod_defaults_driver
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0006()
+{
+	kmod_defaults_fs
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0007()
+{
+	kmod_test_0005
+	kmod_test_0006
+}
+
+kmod_test_0008()
+{
+	kmod_defaults_driver
+	MODPROBE_LIMIT=$(config_get_modprobe_limit)
+	let EXTRA=$MODPROBE_LIMIT/6
+	config_num_thread_limit_extra $EXTRA
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0009()
+{
+	kmod_defaults_fs
+	MODPROBE_LIMIT=$(config_get_modprobe_limit)
+	let EXTRA=$MODPROBE_LIMIT/4
+	config_num_thread_limit_extra $EXTRA
+	config_trigger ${FUNCNAME[0]}
+	config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+list_tests()
+{
+	echo "Test ID list:"
+	echo
+	echo "TEST_ID x NUM_TEST"
+	echo "TEST_ID:   Test ID"
+	echo "NUM_TESTS: Number of recommended times to run the test"
+	echo
+	echo "0001 x $(get_test_count 0001) - Simple test - 1 thread  for empty string"
+	echo "0002 x $(get_test_count 0002) - Simple test - 1 thread  for modules/filesystems that do not exist"
+	echo "0003 x $(get_test_count 0003) - Simple test - 1 thread  for get_fs_type() only"
+	echo "0004 x $(get_test_count 0004) - Simple test - 2 threads for get_fs_type() only"
+	echo "0005 x $(get_test_count 0005) - multithreaded tests with default setup - request_module() only"
+	echo "0006 x $(get_test_count 0006) - multithreaded tests with default setup - get_fs_type() only"
+	echo "0007 x $(get_test_count 0007) - multithreaded tests with default setup test request_module() and get_fs_type()"
+	echo "0008 x $(get_test_count 0008) - multithreaded - push kmod_concurrent over max_modprobes for request_module()"
+	echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()"
+}
+
+usage()
+{
+	NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .)
+	let NUM_TESTS=$NUM_TESTS+1
+	MAX_TEST=$(printf "%04d\n" $NUM_TESTS)
+	echo "Usage: $0 [ -t <4-number-digit> ] | [ -w <4-number-digit> ] |"
+	echo "		 [ -s <4-number-digit> ] | [ -c <4-number-digit> <test- count>"
+	echo "           [ all ] [ -h | --help ] [ -l ]"
+	echo ""
+	echo "Valid tests: 0001-$MAX_TEST"
+	echo ""
+	echo "    all     Runs all tests (default)"
+	echo "    -t      Run test ID the number amount of times is recommended"
+	echo "    -w      Watch test ID run until it runs into an error"
+	echo "    -s      Run test ID once"
+	echo "    -c      Run test ID x test-count number of times"
+	echo "    -l      List all test ID list"
+	echo " -h|--help  Help"
+	echo
+	echo "If an error every occurs execution will immediately terminate."
+	echo "If you are adding a new test try using -w <test-ID> first to"
+	echo "make sure the test passes a series of tests."
+	echo
+	echo Example uses:
+	echo
+	echo "${TEST_NAME}.sh		-- executes all tests"
+	echo "${TEST_NAME}.sh -t 0008	-- Executes test ID 0008 number of times is recomended"
+	echo "${TEST_NAME}.sh -w 0008	-- Watch test ID 0008 run until an error occurs"
+	echo "${TEST_NAME}.sh -s 0008	-- Run test ID 0008 once"
+	echo "${TEST_NAME}.sh -c 0008 3	-- Run test ID 0008 three times"
+	echo
+	list_tests
+	exit 1
+}
+
+function test_num()
+{
+	re='^[0-9]+$'
+	if ! [[ $1 =~ $re ]]; then
+		usage
+	fi
+}
+
+function get_test_data()
+{
+	test_num $1
+	local field_num=$(echo $1 | sed 's/^0*//')
+	echo $ALL_TESTS | awk '{print $'$field_num'}'
+}
+
+function get_test_count()
+{
+	TEST_DATA=$(get_test_data $1)
+	LAST_TWO=${TEST_DATA#*:*}
+	echo ${LAST_TWO%:*}
+}
+
+function get_test_enabled()
+{
+	TEST_DATA=$(get_test_data $1)
+	echo ${TEST_DATA#*:*:}
+}
+
+function run_all_tests()
+{
+	for i in $ALL_TESTS ; do
+		TEST_ID=${i%:*:*}
+		ENABLED=$(get_test_enabled $TEST_ID)
+		TEST_COUNT=$(get_test_count $TEST_ID)
+		if [[ $ENABLED -eq "1" ]]; then
+			test_case $TEST_ID $TEST_COUNT
+		fi
+	done
+}
+
+function watch_log()
+{
+	if [ $# -ne 3 ]; then
+		clear
+	fi
+	date
+	echo "Running test: $2 - run #$1"
+}
+
+function watch_case()
+{
+	i=0
+	while [ 1 ]; do
+
+		if [ $# -eq 1 ]; then
+			test_num $1
+			watch_log $i ${TEST_NAME}_test_$1
+			${TEST_NAME}_test_$1
+		else
+			watch_log $i all
+			run_all_tests
+		fi
+		let i=$i+1
+	done
+}
+
+function test_case()
+{
+	NUM_TESTS=$DEFAULT_NUM_TESTS
+	if [ $# -eq 2 ]; then
+		NUM_TESTS=$2
+	fi
+
+	i=0
+	while [ $i -lt $NUM_TESTS ]; do
+		test_num $1
+		watch_log $i ${TEST_NAME}_test_$1 noclear
+		RUN_TEST=${TEST_NAME}_test_$1
+		$RUN_TEST
+		let i=$i+1
+	done
+}
+
+function parse_args()
+{
+	if [ $# -eq 0 ]; then
+		run_all_tests
+	else
+		if [[ "$1" = "all" ]]; then
+			run_all_tests
+		elif [[ "$1" = "-w" ]]; then
+			shift
+			watch_case $@
+		elif [[ "$1" = "-t" ]]; then
+			shift
+			test_num $1
+			test_case $1 $(get_test_count $1)
+		elif [[ "$1" = "-c" ]]; then
+			shift
+			test_num $1
+			test_num $2
+			test_case $1 $2
+		elif [[ "$1" = "-s" ]]; then
+			shift
+			test_case $1 1
+		elif [[ "$1" = "-l" ]]; then
+			list_tests
+		elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
+			usage
+		else
+			usage
+		fi
+	fi
+}
+
+test_reqs
+allow_user_defaults
+load_req_mod
+
+trap "test_finish" EXIT
+
+parse_args $@
+
+exit 0
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
new file mode 100644
index 000000000..a3edb2c8e
--- /dev/null
+++ b/tools/testing/selftests/kselftest.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * kselftest.h:	kselftest framework return codes to include from
+ *		selftests.
+ *
+ * Copyright (c) 2014 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *
+ */
+#ifndef __KSELFTEST_H
+#define __KSELFTEST_H
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+/* define kselftest exit codes */
+#define KSFT_PASS  0
+#define KSFT_FAIL  1
+#define KSFT_XFAIL 2
+#define KSFT_XPASS 3
+#define KSFT_SKIP  4
+
+/* counters */
+struct ksft_count {
+	unsigned int ksft_pass;
+	unsigned int ksft_fail;
+	unsigned int ksft_xfail;
+	unsigned int ksft_xpass;
+	unsigned int ksft_xskip;
+	unsigned int ksft_error;
+};
+
+static struct ksft_count ksft_cnt;
+
+static inline int ksft_test_num(void)
+{
+	return ksft_cnt.ksft_pass + ksft_cnt.ksft_fail +
+		ksft_cnt.ksft_xfail + ksft_cnt.ksft_xpass +
+		ksft_cnt.ksft_xskip + ksft_cnt.ksft_error;
+}
+
+static inline void ksft_inc_pass_cnt(void) { ksft_cnt.ksft_pass++; }
+static inline void ksft_inc_fail_cnt(void) { ksft_cnt.ksft_fail++; }
+static inline void ksft_inc_xfail_cnt(void) { ksft_cnt.ksft_xfail++; }
+static inline void ksft_inc_xpass_cnt(void) { ksft_cnt.ksft_xpass++; }
+static inline void ksft_inc_xskip_cnt(void) { ksft_cnt.ksft_xskip++; }
+static inline void ksft_inc_error_cnt(void) { ksft_cnt.ksft_error++; }
+
+static inline int ksft_get_pass_cnt(void) { return ksft_cnt.ksft_pass; }
+static inline int ksft_get_fail_cnt(void) { return ksft_cnt.ksft_fail; }
+static inline int ksft_get_xfail_cnt(void) { return ksft_cnt.ksft_xfail; }
+static inline int ksft_get_xpass_cnt(void) { return ksft_cnt.ksft_xpass; }
+static inline int ksft_get_xskip_cnt(void) { return ksft_cnt.ksft_xskip; }
+static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
+
+static inline void ksft_print_header(void)
+{
+	if (!(getenv("KSFT_TAP_LEVEL")))
+		printf("TAP version 13\n");
+}
+
+static inline void ksft_print_cnts(void)
+{
+	printf("Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n",
+		ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
+		ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
+		ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
+	printf("1..%d\n", ksft_test_num());
+}
+
+static inline void ksft_print_msg(const char *msg, ...)
+{
+	va_list args;
+
+	va_start(args, msg);
+	printf("# ");
+	vprintf(msg, args);
+	va_end(args);
+}
+
+static inline void ksft_test_result_pass(const char *msg, ...)
+{
+	va_list args;
+
+	ksft_cnt.ksft_pass++;
+
+	va_start(args, msg);
+	printf("ok %d ", ksft_test_num());
+	vprintf(msg, args);
+	va_end(args);
+}
+
+static inline void ksft_test_result_fail(const char *msg, ...)
+{
+	va_list args;
+
+	ksft_cnt.ksft_fail++;
+
+	va_start(args, msg);
+	printf("not ok %d ", ksft_test_num());
+	vprintf(msg, args);
+	va_end(args);
+}
+
+static inline void ksft_test_result_skip(const char *msg, ...)
+{
+	va_list args;
+
+	ksft_cnt.ksft_xskip++;
+
+	va_start(args, msg);
+	printf("ok %d # skip ", ksft_test_num());
+	vprintf(msg, args);
+	va_end(args);
+}
+
+static inline void ksft_test_result_error(const char *msg, ...)
+{
+	va_list args;
+
+	ksft_cnt.ksft_error++;
+
+	va_start(args, msg);
+	printf("not ok %d # error ", ksft_test_num());
+	vprintf(msg, args);
+	va_end(args);
+}
+
+static inline int ksft_exit_pass(void)
+{
+	ksft_print_cnts();
+	exit(KSFT_PASS);
+}
+
+static inline int ksft_exit_fail(void)
+{
+	printf("Bail out!\n");
+	ksft_print_cnts();
+	exit(KSFT_FAIL);
+}
+
+static inline int ksft_exit_fail_msg(const char *msg, ...)
+{
+	va_list args;
+
+	va_start(args, msg);
+	printf("Bail out! ");
+	vprintf(msg, args);
+	va_end(args);
+
+	ksft_print_cnts();
+	exit(KSFT_FAIL);
+}
+
+static inline int ksft_exit_xfail(void)
+{
+	ksft_print_cnts();
+	exit(KSFT_XFAIL);
+}
+
+static inline int ksft_exit_xpass(void)
+{
+	ksft_print_cnts();
+	exit(KSFT_XPASS);
+}
+
+static inline int ksft_exit_skip(const char *msg, ...)
+{
+	if (msg) {
+		va_list args;
+
+		va_start(args, msg);
+		printf("1..%d # Skipped: ", ksft_test_num());
+		vprintf(msg, args);
+		va_end(args);
+	} else {
+		ksft_print_cnts();
+	}
+	exit(KSFT_SKIP);
+}
+
+#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
new file mode 100644
index 000000000..76d654ef3
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -0,0 +1,779 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * kselftest_harness.h: simple C unit test helper.
+ *
+ * See documentation in Documentation/dev-tools/kselftest.rst
+ *
+ * API inspired by code.google.com/p/googletest
+ */
+
+/**
+ * DOC: example
+ *
+ * .. code-block:: c
+ *
+ *    #include "../kselftest_harness.h"
+ *
+ *    TEST(standalone_test) {
+ *      do_some_stuff;
+ *      EXPECT_GT(10, stuff) {
+ *         stuff_state_t state;
+ *         enumerate_stuff_state(&state);
+ *         TH_LOG("expectation failed with state: %s", state.msg);
+ *      }
+ *      more_stuff;
+ *      ASSERT_NE(some_stuff, NULL) TH_LOG("how did it happen?!");
+ *      last_stuff;
+ *      EXPECT_EQ(0, last_stuff);
+ *    }
+ *
+ *    FIXTURE(my_fixture) {
+ *      mytype_t *data;
+ *      int awesomeness_level;
+ *    };
+ *    FIXTURE_SETUP(my_fixture) {
+ *      self->data = mytype_new();
+ *      ASSERT_NE(NULL, self->data);
+ *    }
+ *    FIXTURE_TEARDOWN(my_fixture) {
+ *      mytype_free(self->data);
+ *    }
+ *    TEST_F(my_fixture, data_is_good) {
+ *      EXPECT_EQ(1, is_my_data_good(self->data));
+ *    }
+ *
+ *    TEST_HARNESS_MAIN
+ */
+
+#ifndef __KSELFTEST_HARNESS_H
+#define __KSELFTEST_HARNESS_H
+
+#define _GNU_SOURCE
+#include <asm/types.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+
+/* Utilities exposed to the test definitions */
+#ifndef TH_LOG_STREAM
+#  define TH_LOG_STREAM stderr
+#endif
+
+#ifndef TH_LOG_ENABLED
+#  define TH_LOG_ENABLED 1
+#endif
+
+/**
+ * TH_LOG(fmt, ...)
+ *
+ * @fmt: format string
+ * @...: optional arguments
+ *
+ * .. code-block:: c
+ *
+ *     TH_LOG(format, ...)
+ *
+ * Optional debug logging function available for use in tests.
+ * Logging may be enabled or disabled by defining TH_LOG_ENABLED.
+ * E.g., #define TH_LOG_ENABLED 1
+ *
+ * If no definition is provided, logging is enabled by default.
+ *
+ * If there is no way to print an error message for the process running the
+ * test (e.g. not allowed to write to stderr), it is still possible to get the
+ * ASSERT_* number for which the test failed.  This behavior can be enabled by
+ * writing `_metadata->no_print = true;` before the check sequence that is
+ * unable to print.  When an error occur, instead of printing an error message
+ * and calling `abort(3)`, the test process call `_exit(2)` with the assert
+ * number as argument, which is then printed by the parent process.
+ */
+#define TH_LOG(fmt, ...) do { \
+	if (TH_LOG_ENABLED) \
+		__TH_LOG(fmt, ##__VA_ARGS__); \
+} while (0)
+
+/* Unconditional logger for internal use. */
+#define __TH_LOG(fmt, ...) \
+		fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \
+			__FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
+
+/**
+ * XFAIL(statement, fmt, ...)
+ *
+ * @statement: statement to run after reporting XFAIL
+ * @fmt: format string
+ * @...: optional arguments
+ *
+ * This forces a "pass" after reporting a failure with an XFAIL prefix,
+ * and runs "statement", which is usually "return" or "goto skip".
+ */
+#define XFAIL(statement, fmt, ...) do { \
+	if (TH_LOG_ENABLED) { \
+		fprintf(TH_LOG_STREAM, "[  XFAIL!  ] " fmt "\n", \
+			##__VA_ARGS__); \
+	} \
+	/* TODO: find a way to pass xfail to test runner process. */ \
+	_metadata->passed = 1; \
+	_metadata->trigger = 0; \
+	statement; \
+} while (0)
+
+/**
+ * TEST(test_name) - Defines the test function and creates the registration
+ * stub
+ *
+ * @test_name: test name
+ *
+ * .. code-block:: c
+ *
+ *     TEST(name) { implementation }
+ *
+ * Defines a test by name.
+ * Names must be unique and tests must not be run in parallel.  The
+ * implementation containing block is a function and scoping should be treated
+ * as such.  Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST(test_name) __TEST_IMPL(test_name, -1)
+
+/**
+ * TEST_SIGNAL(test_name, signal)
+ *
+ * @test_name: test name
+ * @signal: signal number
+ *
+ * .. code-block:: c
+ *
+ *     TEST_SIGNAL(name, signal) { implementation }
+ *
+ * Defines a test by name and the expected term signal.
+ * Names must be unique and tests must not be run in parallel.  The
+ * implementation containing block is a function and scoping should be treated
+ * as such.  Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST_SIGNAL(test_name, signal) __TEST_IMPL(test_name, signal)
+
+#define __TEST_IMPL(test_name, _signal) \
+	static void test_name(struct __test_metadata *_metadata); \
+	static struct __test_metadata _##test_name##_object = \
+		{ name: "global." #test_name, \
+		  fn: &test_name, termsig: _signal }; \
+	static void __attribute__((constructor)) _register_##test_name(void) \
+	{ \
+		__register_test(&_##test_name##_object); \
+	} \
+	static void test_name( \
+		struct __test_metadata __attribute__((unused)) *_metadata)
+
+/**
+ * FIXTURE_DATA(datatype_name) - Wraps the struct name so we have one less
+ * argument to pass around
+ *
+ * @datatype_name: datatype name
+ *
+ * .. code-block:: c
+ *
+ *     FIXTURE_DATA(datatype name)
+ *
+ * This call may be used when the type of the fixture data
+ * is needed.  In general, this should not be needed unless
+ * the *self* is being passed to a helper directly.
+ */
+#define FIXTURE_DATA(datatype_name) struct _test_data_##datatype_name
+
+/**
+ * FIXTURE(fixture_name) - Called once per fixture to setup the data and
+ * register
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ *     FIXTURE(datatype name) {
+ *       type property1;
+ *       ...
+ *     };
+ *
+ * Defines the data provided to TEST_F()-defined tests as *self*.  It should be
+ * populated and cleaned up using FIXTURE_SETUP() and FIXTURE_TEARDOWN().
+ */
+#define FIXTURE(fixture_name) \
+	static void __attribute__((constructor)) \
+	_register_##fixture_name##_data(void) \
+	{ \
+		__fixture_count++; \
+	} \
+	FIXTURE_DATA(fixture_name)
+
+/**
+ * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture.
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ *     FIXTURE_SETUP(fixture name) { implementation }
+ *
+ * Populates the required "setup" function for a fixture.  An instance of the
+ * datatype defined with FIXTURE_DATA() will be exposed as *self* for the
+ * implementation.
+ *
+ * ASSERT_* are valid for use in this context and will prempt the execution
+ * of any dependent fixture tests.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_SETUP(fixture_name) \
+	void fixture_name##_setup( \
+		struct __test_metadata __attribute__((unused)) *_metadata, \
+		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+/**
+ * FIXTURE_TEARDOWN(fixture_name)
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ *     FIXTURE_TEARDOWN(fixture name) { implementation }
+ *
+ * Populates the required "teardown" function for a fixture.  An instance of the
+ * datatype defined with FIXTURE_DATA() will be exposed as *self* for the
+ * implementation to clean up.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_TEARDOWN(fixture_name) \
+	void fixture_name##_teardown( \
+		struct __test_metadata __attribute__((unused)) *_metadata, \
+		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/**
+ * TEST_F(fixture_name, test_name) - Emits test registration and helpers for
+ * fixture-based test cases
+ *
+ * @fixture_name: fixture name
+ * @test_name: test name
+ *
+ * .. code-block:: c
+ *
+ *     TEST_F(fixture, name) { implementation }
+ *
+ * Defines a test that depends on a fixture (e.g., is part of a test case).
+ * Very similar to TEST() except that *self* is the setup instance of fixture's
+ * datatype exposed for use by the implementation.
+ *
+ * Warning: use of ASSERT_* here will skip TEARDOWN.
+ */
+/* TODO(wad) register fixtures on dedicated test lists. */
+#define TEST_F(fixture_name, test_name) \
+	__TEST_F_IMPL(fixture_name, test_name, -1)
+
+#define TEST_F_SIGNAL(fixture_name, test_name, signal) \
+	__TEST_F_IMPL(fixture_name, test_name, signal)
+
+#define __TEST_F_IMPL(fixture_name, test_name, signal) \
+	static void fixture_name##_##test_name( \
+		struct __test_metadata *_metadata, \
+		FIXTURE_DATA(fixture_name) *self); \
+	static inline void wrapper_##fixture_name##_##test_name( \
+		struct __test_metadata *_metadata) \
+	{ \
+		/* fixture data is alloced, setup, and torn down per call. */ \
+		FIXTURE_DATA(fixture_name) self; \
+		memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
+		fixture_name##_setup(_metadata, &self); \
+		/* Let setup failure terminate early. */ \
+		if (!_metadata->passed) \
+			return; \
+		fixture_name##_##test_name(_metadata, &self); \
+		fixture_name##_teardown(_metadata, &self); \
+	} \
+	static struct __test_metadata \
+		      _##fixture_name##_##test_name##_object = { \
+		name: #fixture_name "." #test_name, \
+		fn: &wrapper_##fixture_name##_##test_name, \
+		termsig: signal, \
+	 }; \
+	static void __attribute__((constructor)) \
+			_register_##fixture_name##_##test_name(void) \
+	{ \
+		__register_test(&_##fixture_name##_##test_name##_object); \
+	} \
+	static void fixture_name##_##test_name( \
+		struct __test_metadata __attribute__((unused)) *_metadata, \
+		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/**
+ * TEST_HARNESS_MAIN - Simple wrapper to run the test harness
+ *
+ * .. code-block:: c
+ *
+ *     TEST_HARNESS_MAIN
+ *
+ * Use once to append a main() to the test file.
+ */
+#define TEST_HARNESS_MAIN \
+	static void __attribute__((constructor)) \
+	__constructor_order_last(void) \
+	{ \
+		if (!__constructor_order) \
+			__constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \
+	} \
+	int main(int argc, char **argv) { \
+		return test_harness_run(argc, argv); \
+	}
+
+/**
+ * DOC: operators
+ *
+ * Operators for use in TEST() and TEST_F().
+ * ASSERT_* calls will stop test execution immediately.
+ * EXPECT_* calls will emit a failure warning, note it, and continue.
+ */
+
+/**
+ * ASSERT_EQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_EQ(expected, measured): expected == measured
+ */
+#define ASSERT_EQ(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, ==, 1)
+
+/**
+ * ASSERT_NE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_NE(expected, measured): expected != measured
+ */
+#define ASSERT_NE(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, !=, 1)
+
+/**
+ * ASSERT_LT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_LT(expected, measured): expected < measured
+ */
+#define ASSERT_LT(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, <, 1)
+
+/**
+ * ASSERT_LE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_LE(expected, measured): expected <= measured
+ */
+#define ASSERT_LE(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, <=, 1)
+
+/**
+ * ASSERT_GT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_GT(expected, measured): expected > measured
+ */
+#define ASSERT_GT(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, >, 1)
+
+/**
+ * ASSERT_GE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_GE(expected, measured): expected >= measured
+ */
+#define ASSERT_GE(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, >=, 1)
+
+/**
+ * ASSERT_NULL(seen)
+ *
+ * @seen: measured value
+ *
+ * ASSERT_NULL(measured): NULL == measured
+ */
+#define ASSERT_NULL(seen) \
+	__EXPECT(NULL, "NULL", seen, #seen, ==, 1)
+
+/**
+ * ASSERT_TRUE(seen)
+ *
+ * @seen: measured value
+ *
+ * ASSERT_TRUE(measured): measured != 0
+ */
+#define ASSERT_TRUE(seen) \
+	__EXPECT(0, "0", seen, #seen, !=, 1)
+
+/**
+ * ASSERT_FALSE(seen)
+ *
+ * @seen: measured value
+ *
+ * ASSERT_FALSE(measured): measured == 0
+ */
+#define ASSERT_FALSE(seen) \
+	__EXPECT(0, "0", seen, #seen, ==, 1)
+
+/**
+ * ASSERT_STREQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_STREQ(expected, measured): !strcmp(expected, measured)
+ */
+#define ASSERT_STREQ(expected, seen) \
+	__EXPECT_STR(expected, seen, ==, 1)
+
+/**
+ * ASSERT_STRNE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_STRNE(expected, measured): strcmp(expected, measured)
+ */
+#define ASSERT_STRNE(expected, seen) \
+	__EXPECT_STR(expected, seen, !=, 1)
+
+/**
+ * EXPECT_EQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_EQ(expected, measured): expected == measured
+ */
+#define EXPECT_EQ(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, ==, 0)
+
+/**
+ * EXPECT_NE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_NE(expected, measured): expected != measured
+ */
+#define EXPECT_NE(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, !=, 0)
+
+/**
+ * EXPECT_LT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_LT(expected, measured): expected < measured
+ */
+#define EXPECT_LT(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, <, 0)
+
+/**
+ * EXPECT_LE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_LE(expected, measured): expected <= measured
+ */
+#define EXPECT_LE(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, <=, 0)
+
+/**
+ * EXPECT_GT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_GT(expected, measured): expected > measured
+ */
+#define EXPECT_GT(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, >, 0)
+
+/**
+ * EXPECT_GE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_GE(expected, measured): expected >= measured
+ */
+#define EXPECT_GE(expected, seen) \
+	__EXPECT(expected, #expected, seen, #seen, >=, 0)
+
+/**
+ * EXPECT_NULL(seen)
+ *
+ * @seen: measured value
+ *
+ * EXPECT_NULL(measured): NULL == measured
+ */
+#define EXPECT_NULL(seen) \
+	__EXPECT(NULL, "NULL", seen, #seen, ==, 0)
+
+/**
+ * EXPECT_TRUE(seen)
+ *
+ * @seen: measured value
+ *
+ * EXPECT_TRUE(measured): 0 != measured
+ */
+#define EXPECT_TRUE(seen) \
+	__EXPECT(0, "0", seen, #seen, !=, 0)
+
+/**
+ * EXPECT_FALSE(seen)
+ *
+ * @seen: measured value
+ *
+ * EXPECT_FALSE(measured): 0 == measured
+ */
+#define EXPECT_FALSE(seen) \
+	__EXPECT(0, "0", seen, #seen, ==, 0)
+
+/**
+ * EXPECT_STREQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_STREQ(expected, measured): !strcmp(expected, measured)
+ */
+#define EXPECT_STREQ(expected, seen) \
+	__EXPECT_STR(expected, seen, ==, 0)
+
+/**
+ * EXPECT_STRNE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_STRNE(expected, measured): strcmp(expected, measured)
+ */
+#define EXPECT_STRNE(expected, seen) \
+	__EXPECT_STR(expected, seen, !=, 0)
+
+#define ARRAY_SIZE(a)	(sizeof(a) / sizeof(a[0]))
+
+/* Support an optional handler after and ASSERT_* or EXPECT_*.  The approach is
+ * not thread-safe, but it should be fine in most sane test scenarios.
+ *
+ * Using __bail(), which optionally abort()s, is the easiest way to early
+ * return while still providing an optional block to the API consumer.
+ */
+#define OPTIONAL_HANDLER(_assert) \
+	for (; _metadata->trigger; _metadata->trigger = \
+			__bail(_assert, _metadata->no_print, _metadata->step))
+
+#define __INC_STEP(_metadata) \
+	if (_metadata->passed && _metadata->step < 255) \
+		_metadata->step++;
+
+#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
+	/* Avoid multiple evaluation of the cases */ \
+	__typeof__(_expected) __exp = (_expected); \
+	__typeof__(_seen) __seen = (_seen); \
+	if (_assert) __INC_STEP(_metadata); \
+	if (!(__exp _t __seen)) { \
+		unsigned long long __exp_print = (uintptr_t)__exp; \
+		unsigned long long __seen_print = (uintptr_t)__seen; \
+		__TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+			 _expected_str, __exp_print, #_t, \
+			 _seen_str, __seen_print); \
+		_metadata->passed = 0; \
+		/* Ensure the optional handler is triggered */ \
+		_metadata->trigger = 1; \
+	} \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
+	const char *__exp = (_expected); \
+	const char *__seen = (_seen); \
+	if (_assert) __INC_STEP(_metadata); \
+	if (!(strcmp(__exp, __seen) _t 0))  { \
+		__TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
+		_metadata->passed = 0; \
+		_metadata->trigger = 1; \
+	} \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+/* Contains all the information for test execution and status checking. */
+struct __test_metadata {
+	const char *name;
+	void (*fn)(struct __test_metadata *);
+	int termsig;
+	int passed;
+	int trigger; /* extra handler after the evaluation */
+	__u8 step;
+	bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
+	struct __test_metadata *prev, *next;
+};
+
+/* Storage for the (global) tests to be run. */
+static struct __test_metadata *__test_list;
+static unsigned int __test_count;
+static unsigned int __fixture_count;
+static int __constructor_order;
+
+#define _CONSTRUCTOR_ORDER_FORWARD   1
+#define _CONSTRUCTOR_ORDER_BACKWARD -1
+
+/*
+ * Since constructors are called in reverse order, reverse the test
+ * list so tests are run in source declaration order.
+ * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html
+ * However, it seems not all toolchains do this correctly, so use
+ * __constructor_order to detect which direction is called first
+ * and adjust list building logic to get things running in the right
+ * direction.
+ */
+static inline void __register_test(struct __test_metadata *t)
+{
+	__test_count++;
+	/* Circular linked list where only prev is circular. */
+	if (__test_list == NULL) {
+		__test_list = t;
+		t->next = NULL;
+		t->prev = t;
+		return;
+	}
+	if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) {
+		t->next = NULL;
+		t->prev = __test_list->prev;
+		t->prev->next = t;
+		__test_list->prev = t;
+	} else {
+		t->next = __test_list;
+		t->next->prev = t;
+		t->prev = t;
+		__test_list = t;
+	}
+}
+
+static inline int __bail(int for_realz, bool no_print, __u8 step)
+{
+	if (for_realz) {
+		if (no_print)
+			_exit(step);
+		abort();
+	}
+	return 0;
+}
+
+void __run_test(struct __test_metadata *t)
+{
+	pid_t child_pid;
+	int status;
+
+	t->passed = 1;
+	t->trigger = 0;
+	printf("[ RUN      ] %s\n", t->name);
+	child_pid = fork();
+	if (child_pid < 0) {
+		printf("ERROR SPAWNING TEST CHILD\n");
+		t->passed = 0;
+	} else if (child_pid == 0) {
+		t->fn(t);
+		/* return the step that failed or 0 */
+		_exit(t->passed ? 0 : t->step);
+	} else {
+		/* TODO(wad) add timeout support. */
+		waitpid(child_pid, &status, 0);
+		if (WIFEXITED(status)) {
+			t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
+			if (t->termsig != -1) {
+				fprintf(TH_LOG_STREAM,
+					"%s: Test exited normally "
+					"instead of by signal (code: %d)\n",
+					t->name,
+					WEXITSTATUS(status));
+			} else if (!t->passed) {
+				fprintf(TH_LOG_STREAM,
+					"%s: Test failed at step #%d\n",
+					t->name,
+					WEXITSTATUS(status));
+			}
+		} else if (WIFSIGNALED(status)) {
+			t->passed = 0;
+			if (WTERMSIG(status) == SIGABRT) {
+				fprintf(TH_LOG_STREAM,
+					"%s: Test terminated by assertion\n",
+					t->name);
+			} else if (WTERMSIG(status) == t->termsig) {
+				t->passed = 1;
+			} else {
+				fprintf(TH_LOG_STREAM,
+					"%s: Test terminated unexpectedly "
+					"by signal %d\n",
+					t->name,
+					WTERMSIG(status));
+			}
+		} else {
+			fprintf(TH_LOG_STREAM,
+				"%s: Test ended in some other way [%u]\n",
+				t->name,
+				status);
+		}
+	}
+	printf("[     %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
+}
+
+static int test_harness_run(int __attribute__((unused)) argc,
+			    char __attribute__((unused)) **argv)
+{
+	struct __test_metadata *t;
+	int ret = 0;
+	unsigned int count = 0;
+	unsigned int pass_count = 0;
+
+	/* TODO(wad) add optional arguments similar to gtest. */
+	printf("[==========] Running %u tests from %u test cases.\n",
+	       __test_count, __fixture_count + 1);
+	for (t = __test_list; t; t = t->next) {
+		count++;
+		__run_test(t);
+		if (t->passed)
+			pass_count++;
+		else
+			ret = 1;
+	}
+	printf("[==========] %u / %u tests passed.\n", pass_count, count);
+	printf("[  %s  ]\n", (ret ? "FAILED" : "PASSED"));
+	return ret;
+}
+
+static void __attribute__((constructor)) __constructor_order_first(void)
+{
+	if (!__constructor_order)
+		__constructor_order = _CONSTRUCTOR_ORDER_FORWARD;
+}
+
+#endif  /* __KSELFTEST_HARNESS_H */
diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
new file mode 100755
index 000000000..ec3044638
--- /dev/null
+++ b/tools/testing/selftests/kselftest_install.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Kselftest Install
+# Install kselftest tests
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+install_loc=`pwd`
+
+main()
+{
+	if [ $(basename $install_loc) !=  "selftests" ]; then
+		echo "$0: Please run it in selftests directory ..."
+		exit 1;
+	fi
+	if [ "$#" -eq 0 ]; then
+		echo "$0: Installing in default location - $install_loc ..."
+	elif [ ! -d "$1" ]; then
+		echo "$0: $1 doesn't exist!!"
+		exit 1;
+	else
+		install_loc=$1
+		echo "$0: Installing in specified location - $install_loc ..."
+	fi
+
+	install_dir=$install_loc/kselftest
+
+# Create install directory
+	mkdir -p $install_dir
+# Build tests
+	INSTALL_PATH=$install_dir make install
+}
+
+main "$@"
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
new file mode 100644
index 000000000..5c34752e1
--- /dev/null
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -0,0 +1,6 @@
+cr4_cpuid_sync_test
+platform_info_test
+set_sregs_test
+sync_regs_test
+vmx_tsc_adjust_test
+state_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
new file mode 100644
index 000000000..cc83e2fd3
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile
@@ -0,0 +1,43 @@
+all:
+
+top_srcdir = ../../../../
+KSFT_KHDR_INSTALL := 1
+UNAME_M := $(shell uname -m)
+
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86.c lib/vmx.c
+
+TEST_GEN_PROGS_x86_64 = platform_info_test
+TEST_GEN_PROGS_x86_64 += set_sregs_test
+TEST_GEN_PROGS_x86_64 += sync_regs_test
+TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += state_test
+TEST_GEN_PROGS_x86_64 += dirty_log_test
+
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
+LIBKVM += $(LIBKVM_$(UNAME_M))
+
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
+LDFLAGS += -pthread
+
+# After inclusion, $(OUTPUT) is defined and
+# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
+include ../lib.mk
+
+STATIC_LIBS := $(OUTPUT)/libkvm.a
+LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
+EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
+
+x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
+$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
+	$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
+	$(AR) crs $@ $^
+
+all: $(STATIC_LIBS)
+$(TEST_GEN_PROGS): $(STATIC_LIBS)
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
new file mode 100644
index 000000000..63ed533f7
--- /dev/null
+++ b/tools/testing/selftests/kvm/config
@@ -0,0 +1,3 @@
+CONFIG_KVM=y
+CONFIG_KVM_INTEL=y
+CONFIG_KVM_AMD=y
diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
new file mode 100644
index 000000000..11ec358bf
--- /dev/null
+++ b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ *   Wei Huang <wei@redhat.com>
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define X86_FEATURE_XSAVE	(1<<26)
+#define X86_FEATURE_OSXSAVE	(1<<27)
+#define VCPU_ID			1
+
+static inline bool cr4_cpuid_is_sync(void)
+{
+	int func, subfunc;
+	uint32_t eax, ebx, ecx, edx;
+	uint64_t cr4;
+
+	func = 0x1;
+	subfunc = 0x0;
+	__asm__ __volatile__("cpuid"
+			     : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
+			     : "a"(func), "c"(subfunc));
+
+	cr4 = get_cr4();
+
+	return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+}
+
+static void guest_code(void)
+{
+	uint64_t cr4;
+
+	/* turn on CR4.OSXSAVE */
+	cr4 = get_cr4();
+	cr4 |= X86_CR4_OSXSAVE;
+	set_cr4(cr4);
+
+	/* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+	GUEST_ASSERT(cr4_cpuid_is_sync());
+
+	/* notify hypervisor to change CR4 */
+	GUEST_SYNC(0);
+
+	/* check again */
+	GUEST_ASSERT(cr4_cpuid_is_sync());
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_run *run;
+	struct kvm_vm *vm;
+	struct kvm_sregs sregs;
+	struct kvm_cpuid_entry2 *entry;
+	int rc;
+
+	entry = kvm_get_supported_cpuid_entry(1);
+	if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+		printf("XSAVE feature not supported, skipping test\n");
+		return 0;
+	}
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	while (1) {
+		rc = _vcpu_run(vm, VCPU_ID);
+
+		if (run->exit_reason == KVM_EXIT_IO) {
+			switch (run->io.port) {
+			case GUEST_PORT_SYNC:
+				/* emulate hypervisor clearing CR4.OSXSAVE */
+				vcpu_sregs_get(vm, VCPU_ID, &sregs);
+				sregs.cr4 &= ~X86_CR4_OSXSAVE;
+				vcpu_sregs_set(vm, VCPU_ID, &sregs);
+				break;
+			case GUEST_PORT_ABORT:
+				TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+				break;
+			case GUEST_PORT_DONE:
+				goto done;
+			default:
+				TEST_ASSERT(false, "Unknown port 0x%x.",
+					    run->io.port);
+			}
+		}
+	}
+
+	kvm_vm_free(vm);
+
+done:
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
new file mode 100644
index 000000000..a9c4b5e21
--- /dev/null
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define  DEBUG                 printf
+
+#define  VCPU_ID                        1
+/* The memory slot index to track dirty pages */
+#define  TEST_MEM_SLOT_INDEX            1
+/*
+ * GPA offset of the testing memory slot. Must be bigger than the
+ * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES.
+ */
+#define  TEST_MEM_OFFSET                (1ULL << 30) /* 1G */
+/* Size of the testing memory slot */
+#define  TEST_MEM_PAGES                 (1ULL << 18) /* 1G for 4K pages */
+/* How many pages to dirty for each guest loop */
+#define  TEST_PAGES_PER_LOOP            1024
+/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
+#define  TEST_HOST_LOOP_N               32UL
+/* Interval for each host loop (ms) */
+#define  TEST_HOST_LOOP_INTERVAL        10UL
+
+/*
+ * Guest variables.  We use these variables to share data between host
+ * and guest.  There are two copies of the variables, one in host memory
+ * (which is unused) and one in guest memory.  When the host wants to
+ * access these variables, it needs to call addr_gva2hva() to access the
+ * guest copy.
+ */
+uint64_t guest_random_array[TEST_PAGES_PER_LOOP];
+uint64_t guest_iteration;
+uint64_t guest_page_size;
+
+/*
+ * Writes to the first byte of a random page within the testing memory
+ * region continuously.
+ */
+void guest_code(void)
+{
+	int i = 0;
+	uint64_t volatile *array = guest_random_array;
+	uint64_t volatile *guest_addr;
+
+	while (true) {
+		for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+			/*
+			 * Write to the first 8 bytes of a random page
+			 * on the testing memory region.
+			 */
+			guest_addr = (uint64_t *)
+			    (TEST_MEM_OFFSET +
+			     (array[i] % TEST_MEM_PAGES) * guest_page_size);
+			*guest_addr = guest_iteration;
+		}
+		/* Tell the host that we need more random numbers */
+		GUEST_SYNC(1);
+	}
+}
+
+/*
+ * Host variables.  These variables should only be used by the host
+ * rather than the guest.
+ */
+bool host_quit;
+
+/* Points to the test VM memory region on which we track dirty logs */
+void *host_test_mem;
+
+/* For statistics only */
+uint64_t host_dirty_count;
+uint64_t host_clear_count;
+uint64_t host_track_next_count;
+
+/*
+ * We use this bitmap to track some pages that should have its dirty
+ * bit set in the _next_ iteration.  For example, if we detected the
+ * page value changed to current iteration but at the same time the
+ * page bit is cleared in the latest bitmap, then the system must
+ * report that write in the next get dirty log call.
+ */
+unsigned long *host_bmap_track;
+
+void generate_random_array(uint64_t *guest_array, uint64_t size)
+{
+	uint64_t i;
+
+	for (i = 0; i < size; i++) {
+		guest_array[i] = random();
+	}
+}
+
+void *vcpu_worker(void *data)
+{
+	int ret;
+	uint64_t loops, *guest_array, pages_count = 0;
+	struct kvm_vm *vm = data;
+	struct kvm_run *run;
+	struct guest_args args;
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	/* Retrieve the guest random array pointer and cache it */
+	guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array);
+
+	DEBUG("VCPU starts\n");
+
+	generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+
+	while (!READ_ONCE(host_quit)) {
+		/* Let the guest to dirty these random pages */
+		ret = _vcpu_run(vm, VCPU_ID);
+		guest_args_read(vm, VCPU_ID, &args);
+		if (run->exit_reason == KVM_EXIT_IO &&
+		    args.port == GUEST_PORT_SYNC) {
+			pages_count += TEST_PAGES_PER_LOOP;
+			generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+		} else {
+			TEST_ASSERT(false,
+				    "Invalid guest sync status: "
+				    "exit_reason=%s\n",
+				    exit_reason_str(run->exit_reason));
+		}
+	}
+
+	DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count);
+
+	return NULL;
+}
+
+void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
+{
+	uint64_t page;
+	uint64_t volatile *value_ptr;
+
+	for (page = 0; page < TEST_MEM_PAGES; page++) {
+		value_ptr = host_test_mem + page * getpagesize();
+
+		/* If this is a special page that we were tracking... */
+		if (test_and_clear_bit(page, host_bmap_track)) {
+			host_track_next_count++;
+			TEST_ASSERT(test_bit(page, bmap),
+				    "Page %"PRIu64" should have its dirty bit "
+				    "set in this iteration but it is missing",
+				    page);
+		}
+
+		if (test_bit(page, bmap)) {
+			host_dirty_count++;
+			/*
+			 * If the bit is set, the value written onto
+			 * the corresponding page should be either the
+			 * previous iteration number or the current one.
+			 */
+			TEST_ASSERT(*value_ptr == iteration ||
+				    *value_ptr == iteration - 1,
+				    "Set page %"PRIu64" value %"PRIu64
+				    " incorrect (iteration=%"PRIu64")",
+				    page, *value_ptr, iteration);
+		} else {
+			host_clear_count++;
+			/*
+			 * If cleared, the value written can be any
+			 * value smaller or equals to the iteration
+			 * number.  Note that the value can be exactly
+			 * (iteration-1) if that write can happen
+			 * like this:
+			 *
+			 * (1) increase loop count to "iteration-1"
+			 * (2) write to page P happens (with value
+			 *     "iteration-1")
+			 * (3) get dirty log for "iteration-1"; we'll
+			 *     see that page P bit is set (dirtied),
+			 *     and not set the bit in host_bmap_track
+			 * (4) increase loop count to "iteration"
+			 *     (which is current iteration)
+			 * (5) get dirty log for current iteration,
+			 *     we'll see that page P is cleared, with
+			 *     value "iteration-1".
+			 */
+			TEST_ASSERT(*value_ptr <= iteration,
+				    "Clear page %"PRIu64" value %"PRIu64
+				    " incorrect (iteration=%"PRIu64")",
+				    page, *value_ptr, iteration);
+			if (*value_ptr == iteration) {
+				/*
+				 * This page is _just_ modified; it
+				 * should report its dirtyness in the
+				 * next run
+				 */
+				set_bit(page, host_bmap_track);
+			}
+		}
+	}
+}
+
+void help(char *name)
+{
+	puts("");
+	printf("usage: %s [-i iterations] [-I interval] [-h]\n", name);
+	puts("");
+	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+	       TEST_HOST_LOOP_N);
+	printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
+	       TEST_HOST_LOOP_INTERVAL);
+	puts("");
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	pthread_t vcpu_thread;
+	struct kvm_vm *vm;
+	uint64_t volatile *psize, *iteration;
+	unsigned long *bmap, iterations = TEST_HOST_LOOP_N,
+	    interval = TEST_HOST_LOOP_INTERVAL;
+	int opt;
+
+	while ((opt = getopt(argc, argv, "hi:I:")) != -1) {
+		switch (opt) {
+		case 'i':
+			iterations = strtol(optarg, NULL, 10);
+			break;
+		case 'I':
+			interval = strtol(optarg, NULL, 10);
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			break;
+		}
+	}
+
+	TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n");
+	TEST_ASSERT(interval > 0, "Interval must be bigger than zero");
+
+	DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+	      iterations, interval);
+
+	srandom(time(0));
+
+	bmap = bitmap_alloc(TEST_MEM_PAGES);
+	host_bmap_track = bitmap_alloc(TEST_MEM_PAGES);
+
+	vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code);
+
+	/* Add an extra memory slot for testing dirty logging */
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+				    TEST_MEM_OFFSET,
+				    TEST_MEM_SLOT_INDEX,
+				    TEST_MEM_PAGES,
+				    KVM_MEM_LOG_DIRTY_PAGES);
+	/* Cache the HVA pointer of the region */
+	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET);
+
+	/* Do 1:1 mapping for the dirty track memory slot */
+	virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET,
+		 TEST_MEM_PAGES * getpagesize(), 0);
+
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	/* Tell the guest about the page size on the system */
+	psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size);
+	*psize = getpagesize();
+
+	/* Start the iterations */
+	iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration);
+	*iteration = 1;
+
+	/* Start dirtying pages */
+	pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
+
+	while (*iteration < iterations) {
+		/* Give the vcpu thread some time to dirty some pages */
+		usleep(interval * 1000);
+		kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+		vm_dirty_log_verify(bmap, *iteration);
+		(*iteration)++;
+	}
+
+	/* Tell the vcpu thread to quit */
+	host_quit = true;
+	pthread_join(vcpu_thread, NULL);
+
+	DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
+	      "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
+	      host_track_next_count);
+
+	free(bmap);
+	free(host_bmap_track);
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
new file mode 100644
index 000000000..3acf9a917
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -0,0 +1,194 @@
+/*
+ * tools/testing/selftests/kvm/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#ifndef SELFTEST_KVM_UTIL_H
+#define SELFTEST_KVM_UTIL_H 1
+
+#include "test_util.h"
+
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+#include <sys/ioctl.h>
+
+#include "sparsebit.h"
+
+/*
+ * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
+ * created. Only applies to VMs using EPT.
+ */
+#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
+
+
+/* Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_vm;
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+
+#define DEFAULT_GUEST_PHY_PAGES		512
+#define DEFAULT_GUEST_STACK_VADDR_MIN	0xab6000
+#define DEFAULT_STACK_PGS               5
+
+enum vm_guest_mode {
+	VM_MODE_FLAT48PG,
+};
+
+enum vm_mem_backing_src_type {
+	VM_MEM_SRC_ANONYMOUS,
+	VM_MEM_SRC_ANONYMOUS_THP,
+	VM_MEM_SRC_ANONYMOUS_HUGETLB,
+};
+
+int kvm_check_cap(long cap);
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
+void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp, int perm);
+void kvm_vm_release(struct kvm_vm *vmp);
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
+
+int kvm_memcmp_hva_gva(void *hva,
+	struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
+
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+	uint32_t data_memslot, uint32_t pgd_memslot);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+	uint32_t vcpuid, uint8_t indent);
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+	enum vm_mem_backing_src_type src_type,
+	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+	uint32_t flags);
+
+void vcpu_ioctl(struct kvm_vm *vm,
+	uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+	uint32_t data_memslot, uint32_t pgd_memslot);
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	      size_t size, uint32_t pgd_memslot);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+	struct kvm_mp_state *mp_state);
+void vcpu_regs_get(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+void vcpu_sregs_get(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_vcpu_events *events);
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_vcpu_events *events);
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+	uint64_t msr_value);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	uint32_t pgd_memslot);
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+	vm_paddr_t paddr_min, uint32_t memslot);
+
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+void vcpu_set_cpuid(
+	struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_entry(uint32_t function)
+{
+	return kvm_get_supported_cpuid_index(function, 0);
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
+				 void *guest_code);
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+
+typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
+				 vm_paddr_t vmxon_paddr,
+				 vm_vaddr_t vmcs_vaddr,
+				 vm_paddr_t vmcs_paddr);
+
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+				 uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
+
+int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
+
+#define GUEST_PORT_SYNC         0x1000
+#define GUEST_PORT_ABORT        0x1001
+#define GUEST_PORT_DONE         0x1002
+
+static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
+{
+	__asm__ __volatile__("in %[port], %%al"
+			     :
+			     : [port]"d"(port), "D"(arg0), "S"(arg1)
+			     : "rax");
+}
+
+/*
+ * Allows to pass three arguments to the host: port is 16bit wide,
+ * arg0 & arg1 are 64bit wide
+ */
+#define GUEST_SYNC_ARGS(_port, _arg0, _arg1) \
+	__exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
+
+#define GUEST_ASSERT(_condition) do {				\
+		if (!(_condition))				\
+			GUEST_SYNC_ARGS(GUEST_PORT_ABORT,	\
+					"Failed guest assert: "	\
+					#_condition, __LINE__);	\
+	} while (0)
+
+#define GUEST_SYNC(stage)  GUEST_SYNC_ARGS(GUEST_PORT_SYNC, "hello", stage)
+
+#define GUEST_DONE()  GUEST_SYNC_ARGS(GUEST_PORT_DONE, 0, 0)
+
+struct guest_args {
+	uint64_t arg0;
+	uint64_t arg1;
+	uint16_t port;
+} __attribute__ ((packed));
+
+void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
+		     struct guest_args *args);
+
+#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
new file mode 100644
index 000000000..54cfeb656
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -0,0 +1,75 @@
+/*
+ * tools/testing/selftests/kvm/include/sparsebit.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * Header file that describes API to the sparsebit library.
+ * This library provides a memory efficient means of storing
+ * the settings of bits indexed via a uint64_t.  Memory usage
+ * is reasonable, significantly less than (2^64 / 8) bytes, as
+ * long as bits that are mostly set or mostly cleared are close
+ * to each other.  This library is efficient in memory usage
+ * even in the case where most bits are set.
+ */
+
+#ifndef _TEST_SPARSEBIT_H_
+#define _TEST_SPARSEBIT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct sparsebit;
+typedef uint64_t sparsebit_idx_t;
+typedef uint64_t sparsebit_num_t;
+
+struct sparsebit *sparsebit_alloc(void);
+void sparsebit_free(struct sparsebit **sbitp);
+void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+
+bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(struct sparsebit *sbit,
+			  sparsebit_idx_t idx, sparsebit_num_t num);
+bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(struct sparsebit *sbit,
+			    sparsebit_idx_t idx, sparsebit_num_t num);
+sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
+bool sparsebit_any_set(struct sparsebit *sbit);
+bool sparsebit_any_clear(struct sparsebit *sbit);
+bool sparsebit_all_set(struct sparsebit *sbit);
+bool sparsebit_all_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+				       sparsebit_idx_t start, sparsebit_num_t num);
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+					 sparsebit_idx_t start, sparsebit_num_t num);
+
+void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start,
+		       sparsebit_num_t num);
+void sparsebit_set_all(struct sparsebit *sbitp);
+
+void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_clear_num(struct sparsebit *sbitp,
+			 sparsebit_idx_t start, sparsebit_num_t num);
+void sparsebit_clear_all(struct sparsebit *sbitp);
+
+void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+		    unsigned int indent);
+void sparsebit_validate_internal(struct sparsebit *sbit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TEST_SPARSEBIT_H_ */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
new file mode 100644
index 000000000..73c393343
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -0,0 +1,44 @@
+/*
+ * tools/testing/selftests/kvm/include/test_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef TEST_UTIL_H
+#define TEST_UTIL_H 1
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "kselftest.h"
+
+ssize_t test_write(int fd, const void *buf, size_t count);
+ssize_t test_read(int fd, void *buf, size_t count);
+int test_seq_read(const char *path, char **bufp, size_t *sizep);
+
+void test_assert(bool exp, const char *exp_str,
+		 const char *file, unsigned int line, const char *fmt, ...);
+
+#define TEST_ASSERT(e, fmt, ...) \
+	test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#define ASSERT_EQ(a, b) do { \
+	typeof(a) __a = (a); \
+	typeof(b) __b = (b); \
+	TEST_ASSERT(__a == __b, \
+		    "ASSERT_EQ(%s, %s) failed.\n" \
+		    "\t%s is %#lx\n" \
+		    "\t%s is %#lx", \
+		    #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+} while (0)
+
+#endif /* TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h
new file mode 100644
index 000000000..b9ffe1024
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/vmx.h
@@ -0,0 +1,552 @@
+/*
+ * tools/testing/selftests/kvm/include/vmx.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_VMX_H
+#define SELFTEST_KVM_VMX_H
+
+#include <stdint.h>
+#include "x86.h"
+
+#define CPUID_VMX_BIT				5
+
+#define CPUID_VMX				(1 << 5)
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_VIRTUAL_INTR_PENDING		0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING		0x00000008
+#define CPU_BASED_HLT_EXITING			0x00000080
+#define CPU_BASED_INVLPG_EXITING		0x00000200
+#define CPU_BASED_MWAIT_EXITING			0x00000400
+#define CPU_BASED_RDPMC_EXITING			0x00000800
+#define CPU_BASED_RDTSC_EXITING			0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING		0x00008000
+#define CPU_BASED_CR3_STORE_EXITING		0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING		0x00080000
+#define CPU_BASED_CR8_STORE_EXITING		0x00100000
+#define CPU_BASED_TPR_SHADOW			0x00200000
+#define CPU_BASED_VIRTUAL_NMI_PENDING		0x00400000
+#define CPU_BASED_MOV_DR_EXITING		0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING		0x01000000
+#define CPU_BASED_USE_IO_BITMAPS		0x02000000
+#define CPU_BASED_MONITOR_TRAP			0x08000000
+#define CPU_BASED_USE_MSR_BITMAPS		0x10000000
+#define CPU_BASED_MONITOR_EXITING		0x20000000
+#define CPU_BASED_PAUSE_EXITING			0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS	0x80000000
+
+#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR	0x0401e172
+
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT		0x00000002
+#define SECONDARY_EXEC_DESC			0x00000004
+#define SECONDARY_EXEC_RDTSCP			0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE	0x00000010
+#define SECONDARY_EXEC_ENABLE_VPID		0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT	0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY	0x00000200
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
+#define SECONDARY_EXEC_RDRAND_EXITING		0x00000800
+#define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC		0x00002000
+#define SECONDARY_EXEC_SHADOW_VMCS		0x00004000
+#define SECONDARY_EXEC_RDSEED_EXITING		0x00010000
+#define SECONDARY_EXEC_ENABLE_PML		0x00020000
+#define SECONDARY_EPT_VE			0x00040000
+#define SECONDARY_ENABLE_XSAV_RESTORE		0x00100000
+#define SECONDARY_EXEC_TSC_SCALING		0x02000000
+
+#define PIN_BASED_EXT_INTR_MASK			0x00000001
+#define PIN_BASED_NMI_EXITING			0x00000008
+#define PIN_BASED_VIRTUAL_NMIS			0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER		0x00000040
+#define PIN_BASED_POSTED_INTR			0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR	0x00000016
+
+#define VM_EXIT_SAVE_DEBUG_CONTROLS		0x00000004
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE		0x00000200
+#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL	0x00001000
+#define VM_EXIT_ACK_INTR_ON_EXIT		0x00008000
+#define VM_EXIT_SAVE_IA32_PAT			0x00040000
+#define VM_EXIT_LOAD_IA32_PAT			0x00080000
+#define VM_EXIT_SAVE_IA32_EFER			0x00100000
+#define VM_EXIT_LOAD_IA32_EFER			0x00200000
+#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER	0x00400000
+
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR	0x00036dff
+
+#define VM_ENTRY_LOAD_DEBUG_CONTROLS		0x00000004
+#define VM_ENTRY_IA32E_MODE			0x00000200
+#define VM_ENTRY_SMM				0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR		0x00000800
+#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL	0x00002000
+#define VM_ENTRY_LOAD_IA32_PAT			0x00004000
+#define VM_ENTRY_LOAD_IA32_EFER			0x00008000
+
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR	0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK	0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA			0x00000020
+
+#define EXIT_REASON_FAILED_VMENTRY	0x80000000
+#define EXIT_REASON_EXCEPTION_NMI	0
+#define EXIT_REASON_EXTERNAL_INTERRUPT	1
+#define EXIT_REASON_TRIPLE_FAULT	2
+#define EXIT_REASON_PENDING_INTERRUPT	7
+#define EXIT_REASON_NMI_WINDOW		8
+#define EXIT_REASON_TASK_SWITCH		9
+#define EXIT_REASON_CPUID		10
+#define EXIT_REASON_HLT			12
+#define EXIT_REASON_INVD		13
+#define EXIT_REASON_INVLPG		14
+#define EXIT_REASON_RDPMC		15
+#define EXIT_REASON_RDTSC		16
+#define EXIT_REASON_VMCALL		18
+#define EXIT_REASON_VMCLEAR		19
+#define EXIT_REASON_VMLAUNCH		20
+#define EXIT_REASON_VMPTRLD		21
+#define EXIT_REASON_VMPTRST		22
+#define EXIT_REASON_VMREAD		23
+#define EXIT_REASON_VMRESUME		24
+#define EXIT_REASON_VMWRITE		25
+#define EXIT_REASON_VMOFF		26
+#define EXIT_REASON_VMON		27
+#define EXIT_REASON_CR_ACCESS		28
+#define EXIT_REASON_DR_ACCESS		29
+#define EXIT_REASON_IO_INSTRUCTION	30
+#define EXIT_REASON_MSR_READ		31
+#define EXIT_REASON_MSR_WRITE		32
+#define EXIT_REASON_INVALID_STATE	33
+#define EXIT_REASON_MWAIT_INSTRUCTION	36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION	40
+#define EXIT_REASON_MCE_DURING_VMENTRY	41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS		44
+#define EXIT_REASON_EOI_INDUCED		45
+#define EXIT_REASON_EPT_VIOLATION	48
+#define EXIT_REASON_EPT_MISCONFIG	49
+#define EXIT_REASON_INVEPT		50
+#define EXIT_REASON_RDTSCP		51
+#define EXIT_REASON_PREEMPTION_TIMER	52
+#define EXIT_REASON_INVVPID		53
+#define EXIT_REASON_WBINVD		54
+#define EXIT_REASON_XSETBV		55
+#define EXIT_REASON_APIC_WRITE		56
+#define EXIT_REASON_INVPCID		58
+#define EXIT_REASON_PML_FULL		62
+#define EXIT_REASON_XSAVES		63
+#define EXIT_REASON_XRSTORS		64
+#define LAST_EXIT_REASON		64
+
+enum vmcs_field {
+	VIRTUAL_PROCESSOR_ID		= 0x00000000,
+	POSTED_INTR_NV			= 0x00000002,
+	GUEST_ES_SELECTOR		= 0x00000800,
+	GUEST_CS_SELECTOR		= 0x00000802,
+	GUEST_SS_SELECTOR		= 0x00000804,
+	GUEST_DS_SELECTOR		= 0x00000806,
+	GUEST_FS_SELECTOR		= 0x00000808,
+	GUEST_GS_SELECTOR		= 0x0000080a,
+	GUEST_LDTR_SELECTOR		= 0x0000080c,
+	GUEST_TR_SELECTOR		= 0x0000080e,
+	GUEST_INTR_STATUS		= 0x00000810,
+	GUEST_PML_INDEX			= 0x00000812,
+	HOST_ES_SELECTOR		= 0x00000c00,
+	HOST_CS_SELECTOR		= 0x00000c02,
+	HOST_SS_SELECTOR		= 0x00000c04,
+	HOST_DS_SELECTOR		= 0x00000c06,
+	HOST_FS_SELECTOR		= 0x00000c08,
+	HOST_GS_SELECTOR		= 0x00000c0a,
+	HOST_TR_SELECTOR		= 0x00000c0c,
+	IO_BITMAP_A			= 0x00002000,
+	IO_BITMAP_A_HIGH		= 0x00002001,
+	IO_BITMAP_B			= 0x00002002,
+	IO_BITMAP_B_HIGH		= 0x00002003,
+	MSR_BITMAP			= 0x00002004,
+	MSR_BITMAP_HIGH			= 0x00002005,
+	VM_EXIT_MSR_STORE_ADDR		= 0x00002006,
+	VM_EXIT_MSR_STORE_ADDR_HIGH	= 0x00002007,
+	VM_EXIT_MSR_LOAD_ADDR		= 0x00002008,
+	VM_EXIT_MSR_LOAD_ADDR_HIGH	= 0x00002009,
+	VM_ENTRY_MSR_LOAD_ADDR		= 0x0000200a,
+	VM_ENTRY_MSR_LOAD_ADDR_HIGH	= 0x0000200b,
+	PML_ADDRESS			= 0x0000200e,
+	PML_ADDRESS_HIGH		= 0x0000200f,
+	TSC_OFFSET			= 0x00002010,
+	TSC_OFFSET_HIGH			= 0x00002011,
+	VIRTUAL_APIC_PAGE_ADDR		= 0x00002012,
+	VIRTUAL_APIC_PAGE_ADDR_HIGH	= 0x00002013,
+	APIC_ACCESS_ADDR		= 0x00002014,
+	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
+	POSTED_INTR_DESC_ADDR		= 0x00002016,
+	POSTED_INTR_DESC_ADDR_HIGH	= 0x00002017,
+	EPT_POINTER			= 0x0000201a,
+	EPT_POINTER_HIGH		= 0x0000201b,
+	EOI_EXIT_BITMAP0		= 0x0000201c,
+	EOI_EXIT_BITMAP0_HIGH		= 0x0000201d,
+	EOI_EXIT_BITMAP1		= 0x0000201e,
+	EOI_EXIT_BITMAP1_HIGH		= 0x0000201f,
+	EOI_EXIT_BITMAP2		= 0x00002020,
+	EOI_EXIT_BITMAP2_HIGH		= 0x00002021,
+	EOI_EXIT_BITMAP3		= 0x00002022,
+	EOI_EXIT_BITMAP3_HIGH		= 0x00002023,
+	VMREAD_BITMAP			= 0x00002026,
+	VMREAD_BITMAP_HIGH		= 0x00002027,
+	VMWRITE_BITMAP			= 0x00002028,
+	VMWRITE_BITMAP_HIGH		= 0x00002029,
+	XSS_EXIT_BITMAP			= 0x0000202C,
+	XSS_EXIT_BITMAP_HIGH		= 0x0000202D,
+	TSC_MULTIPLIER			= 0x00002032,
+	TSC_MULTIPLIER_HIGH		= 0x00002033,
+	GUEST_PHYSICAL_ADDRESS		= 0x00002400,
+	GUEST_PHYSICAL_ADDRESS_HIGH	= 0x00002401,
+	VMCS_LINK_POINTER		= 0x00002800,
+	VMCS_LINK_POINTER_HIGH		= 0x00002801,
+	GUEST_IA32_DEBUGCTL		= 0x00002802,
+	GUEST_IA32_DEBUGCTL_HIGH	= 0x00002803,
+	GUEST_IA32_PAT			= 0x00002804,
+	GUEST_IA32_PAT_HIGH		= 0x00002805,
+	GUEST_IA32_EFER			= 0x00002806,
+	GUEST_IA32_EFER_HIGH		= 0x00002807,
+	GUEST_IA32_PERF_GLOBAL_CTRL	= 0x00002808,
+	GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+	GUEST_PDPTR0			= 0x0000280a,
+	GUEST_PDPTR0_HIGH		= 0x0000280b,
+	GUEST_PDPTR1			= 0x0000280c,
+	GUEST_PDPTR1_HIGH		= 0x0000280d,
+	GUEST_PDPTR2			= 0x0000280e,
+	GUEST_PDPTR2_HIGH		= 0x0000280f,
+	GUEST_PDPTR3			= 0x00002810,
+	GUEST_PDPTR3_HIGH		= 0x00002811,
+	GUEST_BNDCFGS			= 0x00002812,
+	GUEST_BNDCFGS_HIGH		= 0x00002813,
+	HOST_IA32_PAT			= 0x00002c00,
+	HOST_IA32_PAT_HIGH		= 0x00002c01,
+	HOST_IA32_EFER			= 0x00002c02,
+	HOST_IA32_EFER_HIGH		= 0x00002c03,
+	HOST_IA32_PERF_GLOBAL_CTRL	= 0x00002c04,
+	HOST_IA32_PERF_GLOBAL_CTRL_HIGH	= 0x00002c05,
+	PIN_BASED_VM_EXEC_CONTROL	= 0x00004000,
+	CPU_BASED_VM_EXEC_CONTROL	= 0x00004002,
+	EXCEPTION_BITMAP		= 0x00004004,
+	PAGE_FAULT_ERROR_CODE_MASK	= 0x00004006,
+	PAGE_FAULT_ERROR_CODE_MATCH	= 0x00004008,
+	CR3_TARGET_COUNT		= 0x0000400a,
+	VM_EXIT_CONTROLS		= 0x0000400c,
+	VM_EXIT_MSR_STORE_COUNT		= 0x0000400e,
+	VM_EXIT_MSR_LOAD_COUNT		= 0x00004010,
+	VM_ENTRY_CONTROLS		= 0x00004012,
+	VM_ENTRY_MSR_LOAD_COUNT		= 0x00004014,
+	VM_ENTRY_INTR_INFO_FIELD	= 0x00004016,
+	VM_ENTRY_EXCEPTION_ERROR_CODE	= 0x00004018,
+	VM_ENTRY_INSTRUCTION_LEN	= 0x0000401a,
+	TPR_THRESHOLD			= 0x0000401c,
+	SECONDARY_VM_EXEC_CONTROL	= 0x0000401e,
+	PLE_GAP				= 0x00004020,
+	PLE_WINDOW			= 0x00004022,
+	VM_INSTRUCTION_ERROR		= 0x00004400,
+	VM_EXIT_REASON			= 0x00004402,
+	VM_EXIT_INTR_INFO		= 0x00004404,
+	VM_EXIT_INTR_ERROR_CODE		= 0x00004406,
+	IDT_VECTORING_INFO_FIELD	= 0x00004408,
+	IDT_VECTORING_ERROR_CODE	= 0x0000440a,
+	VM_EXIT_INSTRUCTION_LEN		= 0x0000440c,
+	VMX_INSTRUCTION_INFO		= 0x0000440e,
+	GUEST_ES_LIMIT			= 0x00004800,
+	GUEST_CS_LIMIT			= 0x00004802,
+	GUEST_SS_LIMIT			= 0x00004804,
+	GUEST_DS_LIMIT			= 0x00004806,
+	GUEST_FS_LIMIT			= 0x00004808,
+	GUEST_GS_LIMIT			= 0x0000480a,
+	GUEST_LDTR_LIMIT		= 0x0000480c,
+	GUEST_TR_LIMIT			= 0x0000480e,
+	GUEST_GDTR_LIMIT		= 0x00004810,
+	GUEST_IDTR_LIMIT		= 0x00004812,
+	GUEST_ES_AR_BYTES		= 0x00004814,
+	GUEST_CS_AR_BYTES		= 0x00004816,
+	GUEST_SS_AR_BYTES		= 0x00004818,
+	GUEST_DS_AR_BYTES		= 0x0000481a,
+	GUEST_FS_AR_BYTES		= 0x0000481c,
+	GUEST_GS_AR_BYTES		= 0x0000481e,
+	GUEST_LDTR_AR_BYTES		= 0x00004820,
+	GUEST_TR_AR_BYTES		= 0x00004822,
+	GUEST_INTERRUPTIBILITY_INFO	= 0x00004824,
+	GUEST_ACTIVITY_STATE		= 0X00004826,
+	GUEST_SYSENTER_CS		= 0x0000482A,
+	VMX_PREEMPTION_TIMER_VALUE	= 0x0000482E,
+	HOST_IA32_SYSENTER_CS		= 0x00004c00,
+	CR0_GUEST_HOST_MASK		= 0x00006000,
+	CR4_GUEST_HOST_MASK		= 0x00006002,
+	CR0_READ_SHADOW			= 0x00006004,
+	CR4_READ_SHADOW			= 0x00006006,
+	CR3_TARGET_VALUE0		= 0x00006008,
+	CR3_TARGET_VALUE1		= 0x0000600a,
+	CR3_TARGET_VALUE2		= 0x0000600c,
+	CR3_TARGET_VALUE3		= 0x0000600e,
+	EXIT_QUALIFICATION		= 0x00006400,
+	GUEST_LINEAR_ADDRESS		= 0x0000640a,
+	GUEST_CR0			= 0x00006800,
+	GUEST_CR3			= 0x00006802,
+	GUEST_CR4			= 0x00006804,
+	GUEST_ES_BASE			= 0x00006806,
+	GUEST_CS_BASE			= 0x00006808,
+	GUEST_SS_BASE			= 0x0000680a,
+	GUEST_DS_BASE			= 0x0000680c,
+	GUEST_FS_BASE			= 0x0000680e,
+	GUEST_GS_BASE			= 0x00006810,
+	GUEST_LDTR_BASE			= 0x00006812,
+	GUEST_TR_BASE			= 0x00006814,
+	GUEST_GDTR_BASE			= 0x00006816,
+	GUEST_IDTR_BASE			= 0x00006818,
+	GUEST_DR7			= 0x0000681a,
+	GUEST_RSP			= 0x0000681c,
+	GUEST_RIP			= 0x0000681e,
+	GUEST_RFLAGS			= 0x00006820,
+	GUEST_PENDING_DBG_EXCEPTIONS	= 0x00006822,
+	GUEST_SYSENTER_ESP		= 0x00006824,
+	GUEST_SYSENTER_EIP		= 0x00006826,
+	HOST_CR0			= 0x00006c00,
+	HOST_CR3			= 0x00006c02,
+	HOST_CR4			= 0x00006c04,
+	HOST_FS_BASE			= 0x00006c06,
+	HOST_GS_BASE			= 0x00006c08,
+	HOST_TR_BASE			= 0x00006c0a,
+	HOST_GDTR_BASE			= 0x00006c0c,
+	HOST_IDTR_BASE			= 0x00006c0e,
+	HOST_IA32_SYSENTER_ESP		= 0x00006c10,
+	HOST_IA32_SYSENTER_EIP		= 0x00006c12,
+	HOST_RSP			= 0x00006c14,
+	HOST_RIP			= 0x00006c16,
+};
+
+struct vmx_msr_entry {
+	uint32_t index;
+	uint32_t reserved;
+	uint64_t value;
+} __attribute__ ((aligned(16)));
+
+static inline int vmxon(uint64_t phys)
+{
+	uint8_t ret;
+
+	__asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
+		: [ret]"=rm"(ret)
+		: [pa]"m"(phys)
+		: "cc", "memory");
+
+	return ret;
+}
+
+static inline void vmxoff(void)
+{
+	__asm__ __volatile__("vmxoff");
+}
+
+static inline int vmclear(uint64_t vmcs_pa)
+{
+	uint8_t ret;
+
+	__asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
+		: [ret]"=rm"(ret)
+		: [pa]"m"(vmcs_pa)
+		: "cc", "memory");
+
+	return ret;
+}
+
+static inline int vmptrld(uint64_t vmcs_pa)
+{
+	uint8_t ret;
+
+	__asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
+		: [ret]"=rm"(ret)
+		: [pa]"m"(vmcs_pa)
+		: "cc", "memory");
+
+	return ret;
+}
+
+static inline int vmptrst(uint64_t *value)
+{
+	uint64_t tmp;
+	uint8_t ret;
+
+	__asm__ __volatile__("vmptrst %[value]; setna %[ret]"
+		: [value]"=m"(tmp), [ret]"=rm"(ret)
+		: : "cc", "memory");
+
+	*value = tmp;
+	return ret;
+}
+
+/*
+ * A wrapper around vmptrst that ignores errors and returns zero if the
+ * vmptrst instruction fails.
+ */
+static inline uint64_t vmptrstz(void)
+{
+	uint64_t value = 0;
+	vmptrst(&value);
+	return value;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmlaunch.
+ */
+static inline int vmlaunch(void)
+{
+	int ret;
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "vmwrite %%rsp, %[host_rsp];"
+			     "lea 1f(%%rip), %%rax;"
+			     "vmwrite %%rax, %[host_rip];"
+			     "vmlaunch;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"((uint64_t)HOST_RSP),
+			       [host_rip]"r"((uint64_t)HOST_RIP)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int vmresume(void)
+{
+	int ret;
+
+	__asm__ __volatile__("push %%rbp;"
+			     "push %%rcx;"
+			     "push %%rdx;"
+			     "push %%rsi;"
+			     "push %%rdi;"
+			     "push $0;"
+			     "vmwrite %%rsp, %[host_rsp];"
+			     "lea 1f(%%rip), %%rax;"
+			     "vmwrite %%rax, %[host_rip];"
+			     "vmresume;"
+			     "incq (%%rsp);"
+			     "1: pop %%rax;"
+			     "pop %%rdi;"
+			     "pop %%rsi;"
+			     "pop %%rdx;"
+			     "pop %%rcx;"
+			     "pop %%rbp;"
+			     : [ret]"=&a"(ret)
+			     : [host_rsp]"r"((uint64_t)HOST_RSP),
+			       [host_rip]"r"((uint64_t)HOST_RIP)
+			     : "memory", "cc", "rbx", "r8", "r9", "r10",
+			       "r11", "r12", "r13", "r14", "r15");
+	return ret;
+}
+
+static inline void vmcall(void)
+{
+	/* Currently, L1 destroys our GPRs during vmexits.  */
+	__asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : :
+			     "rax", "rbx", "rcx", "rdx",
+			     "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
+			     "r13", "r14", "r15");
+}
+
+static inline int vmread(uint64_t encoding, uint64_t *value)
+{
+	uint64_t tmp;
+	uint8_t ret;
+
+	__asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
+		: [value]"=rm"(tmp), [ret]"=rm"(ret)
+		: [encoding]"r"(encoding)
+		: "cc", "memory");
+
+	*value = tmp;
+	return ret;
+}
+
+/*
+ * A wrapper around vmread that ignores errors and returns zero if the
+ * vmread instruction fails.
+ */
+static inline uint64_t vmreadz(uint64_t encoding)
+{
+	uint64_t value = 0;
+	vmread(encoding, &value);
+	return value;
+}
+
+static inline int vmwrite(uint64_t encoding, uint64_t value)
+{
+	uint8_t ret;
+
+	__asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
+		: [ret]"=rm"(ret)
+		: [value]"rm"(value), [encoding]"r"(encoding)
+		: "cc", "memory");
+
+	return ret;
+}
+
+static inline uint32_t vmcs_revision(void)
+{
+	return rdmsr(MSR_IA32_VMX_BASIC);
+}
+
+struct vmx_pages {
+	void *vmxon_hva;
+	uint64_t vmxon_gpa;
+	void *vmxon;
+
+	void *vmcs_hva;
+	uint64_t vmcs_gpa;
+	void *vmcs;
+
+	void *msr_hva;
+	uint64_t msr_gpa;
+	void *msr;
+
+	void *shadow_vmcs_hva;
+	uint64_t shadow_vmcs_gpa;
+	void *shadow_vmcs;
+
+	void *vmread_hva;
+	uint64_t vmread_gpa;
+	void *vmread;
+
+	void *vmwrite_hva;
+	uint64_t vmwrite_gpa;
+	void *vmwrite;
+};
+
+struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
+bool prepare_for_vmx_operation(struct vmx_pages *vmx);
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+
+#endif /* !SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h
new file mode 100644
index 000000000..a7667a613
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86.h
@@ -0,0 +1,1047 @@
+/*
+ * tools/testing/selftests/kvm/include/x86.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_X86_H
+#define SELFTEST_KVM_X86_H
+
+#include <assert.h>
+#include <stdint.h>
+
+#define X86_EFLAGS_FIXED	 (1u << 1)
+
+#define X86_CR4_VME		(1ul << 0)
+#define X86_CR4_PVI		(1ul << 1)
+#define X86_CR4_TSD		(1ul << 2)
+#define X86_CR4_DE		(1ul << 3)
+#define X86_CR4_PSE		(1ul << 4)
+#define X86_CR4_PAE		(1ul << 5)
+#define X86_CR4_MCE		(1ul << 6)
+#define X86_CR4_PGE		(1ul << 7)
+#define X86_CR4_PCE		(1ul << 8)
+#define X86_CR4_OSFXSR		(1ul << 9)
+#define X86_CR4_OSXMMEXCPT	(1ul << 10)
+#define X86_CR4_UMIP		(1ul << 11)
+#define X86_CR4_VMXE		(1ul << 13)
+#define X86_CR4_SMXE		(1ul << 14)
+#define X86_CR4_FSGSBASE	(1ul << 16)
+#define X86_CR4_PCIDE		(1ul << 17)
+#define X86_CR4_OSXSAVE		(1ul << 18)
+#define X86_CR4_SMEP		(1ul << 20)
+#define X86_CR4_SMAP		(1ul << 21)
+#define X86_CR4_PKE		(1ul << 22)
+
+/* The enum values match the intruction encoding of each register */
+enum x86_register {
+	RAX = 0,
+	RCX,
+	RDX,
+	RBX,
+	RSP,
+	RBP,
+	RSI,
+	RDI,
+	R8,
+	R9,
+	R10,
+	R11,
+	R12,
+	R13,
+	R14,
+	R15,
+};
+
+struct desc64 {
+	uint16_t limit0;
+	uint16_t base0;
+	unsigned base1:8, type:4, s:1, dpl:2, p:1;
+	unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
+	uint32_t base3;
+	uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+	uint16_t size;
+	uint64_t address;
+} __attribute__((packed));
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+	return ((uint64_t)desc->base3 << 32) |
+		(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+	uint32_t eax, edx;
+
+	/*
+	 * The lfence is to wait (on Intel CPUs) until all previous
+	 * instructions have been executed.
+	 */
+	__asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
+	return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+	uint32_t eax, edx;
+
+	__asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+	return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+	uint32_t a, d;
+
+	__asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+	return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+	uint32_t a = value;
+	uint32_t d = value >> 32;
+
+	__asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+	uint16_t tmp;
+
+	__asm__ __volatile__("in %%dx, %%ax"
+		: /* output */ "=a" (tmp)
+		: /* input */ "d" (port));
+
+	return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+	uint16_t es;
+
+	__asm__ __volatile__("mov %%es, %[es]"
+			     : /* output */ [es]"=rm"(es));
+	return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+	uint16_t cs;
+
+	__asm__ __volatile__("mov %%cs, %[cs]"
+			     : /* output */ [cs]"=rm"(cs));
+	return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+	uint16_t ss;
+
+	__asm__ __volatile__("mov %%ss, %[ss]"
+			     : /* output */ [ss]"=rm"(ss));
+	return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+	uint16_t ds;
+
+	__asm__ __volatile__("mov %%ds, %[ds]"
+			     : /* output */ [ds]"=rm"(ds));
+	return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+	uint16_t fs;
+
+	__asm__ __volatile__("mov %%fs, %[fs]"
+			     : /* output */ [fs]"=rm"(fs));
+	return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+	uint16_t gs;
+
+	__asm__ __volatile__("mov %%gs, %[gs]"
+			     : /* output */ [gs]"=rm"(gs));
+	return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+	uint16_t tr;
+
+	__asm__ __volatile__("str %[tr]"
+			     : /* output */ [tr]"=rm"(tr));
+	return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+	uint64_t cr0;
+
+	__asm__ __volatile__("mov %%cr0, %[cr0]"
+			     : /* output */ [cr0]"=r"(cr0));
+	return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+	uint64_t cr3;
+
+	__asm__ __volatile__("mov %%cr3, %[cr3]"
+			     : /* output */ [cr3]"=r"(cr3));
+	return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+	uint64_t cr4;
+
+	__asm__ __volatile__("mov %%cr4, %[cr4]"
+			     : /* output */ [cr4]"=r"(cr4));
+	return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+	__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline uint64_t get_gdt_base(void)
+{
+	struct desc_ptr gdt;
+	__asm__ __volatile__("sgdt %[gdt]"
+			     : /* output */ [gdt]"=m"(gdt));
+	return gdt.address;
+}
+
+static inline uint64_t get_idt_base(void)
+{
+	struct desc_ptr idt;
+	__asm__ __volatile__("sidt %[idt]"
+			     : /* output */ [idt]"=m"(idt));
+	return idt.address;
+}
+
+#define SET_XMM(__var, __xmm) \
+	asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
+
+static inline void set_xmm(int n, unsigned long val)
+{
+	switch (n) {
+	case 0:
+		SET_XMM(val, xmm0);
+		break;
+	case 1:
+		SET_XMM(val, xmm1);
+		break;
+	case 2:
+		SET_XMM(val, xmm2);
+		break;
+	case 3:
+		SET_XMM(val, xmm3);
+		break;
+	case 4:
+		SET_XMM(val, xmm4);
+		break;
+	case 5:
+		SET_XMM(val, xmm5);
+		break;
+	case 6:
+		SET_XMM(val, xmm6);
+		break;
+	case 7:
+		SET_XMM(val, xmm7);
+		break;
+	}
+}
+
+typedef unsigned long v1di __attribute__ ((vector_size (8)));
+static inline unsigned long get_xmm(int n)
+{
+	assert(n >= 0 && n <= 7);
+
+	register v1di xmm0 __asm__("%xmm0");
+	register v1di xmm1 __asm__("%xmm1");
+	register v1di xmm2 __asm__("%xmm2");
+	register v1di xmm3 __asm__("%xmm3");
+	register v1di xmm4 __asm__("%xmm4");
+	register v1di xmm5 __asm__("%xmm5");
+	register v1di xmm6 __asm__("%xmm6");
+	register v1di xmm7 __asm__("%xmm7");
+	switch (n) {
+	case 0:
+		return (unsigned long)xmm0;
+	case 1:
+		return (unsigned long)xmm1;
+	case 2:
+		return (unsigned long)xmm2;
+	case 3:
+		return (unsigned long)xmm3;
+	case 4:
+		return (unsigned long)xmm4;
+	case 5:
+		return (unsigned long)xmm5;
+	case 6:
+		return (unsigned long)xmm6;
+	case 7:
+		return (unsigned long)xmm7;
+	}
+	return 0;
+}
+
+struct kvm_x86_state;
+struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state);
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE          (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP          (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM          (1UL<<2) /* Emulation */
+#define X86_CR0_TS          (1UL<<3) /* Task Switched */
+#define X86_CR0_ET          (1UL<<4) /* Extension Type */
+#define X86_CR0_NE          (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP          (1UL<<16) /* Write Protect */
+#define X86_CR0_AM          (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW          (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD          (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG          (1UL<<31) /* Paging */
+
+/*
+ * CPU model specific register (MSR) numbers.
+ */
+
+/* x86-64 specific MSRs */
+#define MSR_EFER		0xc0000080 /* extended feature register */
+#define MSR_STAR		0xc0000081 /* legacy mode SYSCALL target */
+#define MSR_LSTAR		0xc0000082 /* long mode SYSCALL target */
+#define MSR_CSTAR		0xc0000083 /* compat mode SYSCALL target */
+#define MSR_SYSCALL_MASK	0xc0000084 /* EFLAGS mask for syscall */
+#define MSR_FS_BASE		0xc0000100 /* 64bit FS base */
+#define MSR_GS_BASE		0xc0000101 /* 64bit GS base */
+#define MSR_KERNEL_GS_BASE	0xc0000102 /* SwapGS GS shadow */
+#define MSR_TSC_AUX		0xc0000103 /* Auxiliary TSC */
+
+/* EFER bits: */
+#define EFER_SCE		(1<<0)  /* SYSCALL/SYSRET */
+#define EFER_LME		(1<<8)  /* Long mode enable */
+#define EFER_LMA		(1<<10) /* Long mode active (read-only) */
+#define EFER_NX			(1<<11) /* No execute enable */
+#define EFER_SVME		(1<<12) /* Enable virtualization */
+#define EFER_LMSLE		(1<<13) /* Long Mode Segment Limit Enable */
+#define EFER_FFXSR		(1<<14) /* Enable Fast FXSAVE/FXRSTOR */
+
+/* Intel MSRs. Some also available on other CPUs */
+
+#define MSR_PPIN_CTL			0x0000004e
+#define MSR_PPIN			0x0000004f
+
+#define MSR_IA32_PERFCTR0		0x000000c1
+#define MSR_IA32_PERFCTR1		0x000000c2
+#define MSR_FSB_FREQ			0x000000cd
+#define MSR_PLATFORM_INFO		0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT	31
+#define MSR_PLATFORM_INFO_CPUID_FAULT		BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
+
+#define MSR_PKG_CST_CONFIG_CONTROL	0x000000e2
+#define NHM_C3_AUTO_DEMOTE		(1UL << 25)
+#define NHM_C1_AUTO_DEMOTE		(1UL << 26)
+#define ATM_LNC_C6_AUTO_DEMOTE		(1UL << 25)
+#define SNB_C1_AUTO_UNDEMOTE		(1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE		(1UL << 28)
+
+#define MSR_MTRRcap			0x000000fe
+#define MSR_IA32_BBL_CR_CTL		0x00000119
+#define MSR_IA32_BBL_CR_CTL3		0x0000011e
+
+#define MSR_IA32_SYSENTER_CS		0x00000174
+#define MSR_IA32_SYSENTER_ESP		0x00000175
+#define MSR_IA32_SYSENTER_EIP		0x00000176
+
+#define MSR_IA32_MCG_CAP		0x00000179
+#define MSR_IA32_MCG_STATUS		0x0000017a
+#define MSR_IA32_MCG_CTL		0x0000017b
+#define MSR_IA32_MCG_EXT_CTL		0x000004d0
+
+#define MSR_OFFCORE_RSP_0		0x000001a6
+#define MSR_OFFCORE_RSP_1		0x000001a7
+#define MSR_TURBO_RATIO_LIMIT		0x000001ad
+#define MSR_TURBO_RATIO_LIMIT1		0x000001ae
+#define MSR_TURBO_RATIO_LIMIT2		0x000001af
+
+#define MSR_LBR_SELECT			0x000001c8
+#define MSR_LBR_TOS			0x000001c9
+#define MSR_LBR_NHM_FROM		0x00000680
+#define MSR_LBR_NHM_TO			0x000006c0
+#define MSR_LBR_CORE_FROM		0x00000040
+#define MSR_LBR_CORE_TO			0x00000060
+
+#define MSR_LBR_INFO_0			0x00000dc0 /* ... 0xddf for _31 */
+#define LBR_INFO_MISPRED		BIT_ULL(63)
+#define LBR_INFO_IN_TX			BIT_ULL(62)
+#define LBR_INFO_ABORT			BIT_ULL(61)
+#define LBR_INFO_CYCLES			0xffff
+
+#define MSR_IA32_PEBS_ENABLE		0x000003f1
+#define MSR_IA32_DS_AREA		0x00000600
+#define MSR_IA32_PERF_CAPABILITIES	0x00000345
+#define MSR_PEBS_LD_LAT_THRESHOLD	0x000003f6
+
+#define MSR_IA32_RTIT_CTL		0x00000570
+#define MSR_IA32_RTIT_STATUS		0x00000571
+#define MSR_IA32_RTIT_ADDR0_A		0x00000580
+#define MSR_IA32_RTIT_ADDR0_B		0x00000581
+#define MSR_IA32_RTIT_ADDR1_A		0x00000582
+#define MSR_IA32_RTIT_ADDR1_B		0x00000583
+#define MSR_IA32_RTIT_ADDR2_A		0x00000584
+#define MSR_IA32_RTIT_ADDR2_B		0x00000585
+#define MSR_IA32_RTIT_ADDR3_A		0x00000586
+#define MSR_IA32_RTIT_ADDR3_B		0x00000587
+#define MSR_IA32_RTIT_CR3_MATCH		0x00000572
+#define MSR_IA32_RTIT_OUTPUT_BASE	0x00000560
+#define MSR_IA32_RTIT_OUTPUT_MASK	0x00000561
+
+#define MSR_MTRRfix64K_00000		0x00000250
+#define MSR_MTRRfix16K_80000		0x00000258
+#define MSR_MTRRfix16K_A0000		0x00000259
+#define MSR_MTRRfix4K_C0000		0x00000268
+#define MSR_MTRRfix4K_C8000		0x00000269
+#define MSR_MTRRfix4K_D0000		0x0000026a
+#define MSR_MTRRfix4K_D8000		0x0000026b
+#define MSR_MTRRfix4K_E0000		0x0000026c
+#define MSR_MTRRfix4K_E8000		0x0000026d
+#define MSR_MTRRfix4K_F0000		0x0000026e
+#define MSR_MTRRfix4K_F8000		0x0000026f
+#define MSR_MTRRdefType			0x000002ff
+
+#define MSR_IA32_CR_PAT			0x00000277
+
+#define MSR_IA32_DEBUGCTLMSR		0x000001d9
+#define MSR_IA32_LASTBRANCHFROMIP	0x000001db
+#define MSR_IA32_LASTBRANCHTOIP		0x000001dc
+#define MSR_IA32_LASTINTFROMIP		0x000001dd
+#define MSR_IA32_LASTINTTOIP		0x000001de
+
+/* DEBUGCTLMSR bits (others vary by model): */
+#define DEBUGCTLMSR_LBR			(1UL <<  0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT		1
+#define DEBUGCTLMSR_BTF			(1UL <<  1) /* single-step on branches */
+#define DEBUGCTLMSR_TR			(1UL <<  6)
+#define DEBUGCTLMSR_BTS			(1UL <<  7)
+#define DEBUGCTLMSR_BTINT		(1UL <<  8)
+#define DEBUGCTLMSR_BTS_OFF_OS		(1UL <<  9)
+#define DEBUGCTLMSR_BTS_OFF_USR		(1UL << 10)
+#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI	(1UL << 11)
+#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT	14
+#define DEBUGCTLMSR_FREEZE_IN_SMM	(1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+
+#define MSR_PEBS_FRONTEND		0x000003f7
+
+#define MSR_IA32_POWER_CTL		0x000001fc
+
+#define MSR_IA32_MC0_CTL		0x00000400
+#define MSR_IA32_MC0_STATUS		0x00000401
+#define MSR_IA32_MC0_ADDR		0x00000402
+#define MSR_IA32_MC0_MISC		0x00000403
+
+/* C-state Residency Counters */
+#define MSR_PKG_C3_RESIDENCY		0x000003f8
+#define MSR_PKG_C6_RESIDENCY		0x000003f9
+#define MSR_ATOM_PKG_C6_RESIDENCY	0x000003fa
+#define MSR_PKG_C7_RESIDENCY		0x000003fa
+#define MSR_CORE_C3_RESIDENCY		0x000003fc
+#define MSR_CORE_C6_RESIDENCY		0x000003fd
+#define MSR_CORE_C7_RESIDENCY		0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY	0x000003ff
+#define MSR_PKG_C2_RESIDENCY		0x0000060d
+#define MSR_PKG_C8_RESIDENCY		0x00000630
+#define MSR_PKG_C9_RESIDENCY		0x00000631
+#define MSR_PKG_C10_RESIDENCY		0x00000632
+
+/* Interrupt Response Limit */
+#define MSR_PKGC3_IRTL			0x0000060a
+#define MSR_PKGC6_IRTL			0x0000060b
+#define MSR_PKGC7_IRTL			0x0000060c
+#define MSR_PKGC8_IRTL			0x00000633
+#define MSR_PKGC9_IRTL			0x00000634
+#define MSR_PKGC10_IRTL			0x00000635
+
+/* Run Time Average Power Limiting (RAPL) Interface */
+
+#define MSR_RAPL_POWER_UNIT		0x00000606
+
+#define MSR_PKG_POWER_LIMIT		0x00000610
+#define MSR_PKG_ENERGY_STATUS		0x00000611
+#define MSR_PKG_PERF_STATUS		0x00000613
+#define MSR_PKG_POWER_INFO		0x00000614
+
+#define MSR_DRAM_POWER_LIMIT		0x00000618
+#define MSR_DRAM_ENERGY_STATUS		0x00000619
+#define MSR_DRAM_PERF_STATUS		0x0000061b
+#define MSR_DRAM_POWER_INFO		0x0000061c
+
+#define MSR_PP0_POWER_LIMIT		0x00000638
+#define MSR_PP0_ENERGY_STATUS		0x00000639
+#define MSR_PP0_POLICY			0x0000063a
+#define MSR_PP0_PERF_STATUS		0x0000063b
+
+#define MSR_PP1_POWER_LIMIT		0x00000640
+#define MSR_PP1_ENERGY_STATUS		0x00000641
+#define MSR_PP1_POLICY			0x00000642
+
+/* Config TDP MSRs */
+#define MSR_CONFIG_TDP_NOMINAL		0x00000648
+#define MSR_CONFIG_TDP_LEVEL_1		0x00000649
+#define MSR_CONFIG_TDP_LEVEL_2		0x0000064A
+#define MSR_CONFIG_TDP_CONTROL		0x0000064B
+#define MSR_TURBO_ACTIVATION_RATIO	0x0000064C
+
+#define MSR_PLATFORM_ENERGY_STATUS	0x0000064D
+
+#define MSR_PKG_WEIGHTED_CORE_C0_RES	0x00000658
+#define MSR_PKG_ANY_CORE_C0_RES		0x00000659
+#define MSR_PKG_ANY_GFXE_C0_RES		0x0000065A
+#define MSR_PKG_BOTH_CORE_GFXE_C0_RES	0x0000065B
+
+#define MSR_CORE_C1_RES			0x00000660
+#define MSR_MODULE_C6_RES_MS		0x00000664
+
+#define MSR_CC6_DEMOTION_POLICY_CONFIG	0x00000668
+#define MSR_MC6_DEMOTION_POLICY_CONFIG	0x00000669
+
+#define MSR_ATOM_CORE_RATIOS		0x0000066a
+#define MSR_ATOM_CORE_VIDS		0x0000066b
+#define MSR_ATOM_CORE_TURBO_RATIOS	0x0000066c
+#define MSR_ATOM_CORE_TURBO_VIDS	0x0000066d
+
+
+#define MSR_CORE_PERF_LIMIT_REASONS	0x00000690
+#define MSR_GFX_PERF_LIMIT_REASONS	0x000006B0
+#define MSR_RING_PERF_LIMIT_REASONS	0x000006B1
+
+/* Hardware P state interface */
+#define MSR_PPERF			0x0000064e
+#define MSR_PERF_LIMIT_REASONS		0x0000064f
+#define MSR_PM_ENABLE			0x00000770
+#define MSR_HWP_CAPABILITIES		0x00000771
+#define MSR_HWP_REQUEST_PKG		0x00000772
+#define MSR_HWP_INTERRUPT		0x00000773
+#define MSR_HWP_REQUEST			0x00000774
+#define MSR_HWP_STATUS			0x00000777
+
+/* CPUID.6.EAX */
+#define HWP_BASE_BIT			(1<<7)
+#define HWP_NOTIFICATIONS_BIT		(1<<8)
+#define HWP_ACTIVITY_WINDOW_BIT		(1<<9)
+#define HWP_ENERGY_PERF_PREFERENCE_BIT	(1<<10)
+#define HWP_PACKAGE_LEVEL_REQUEST_BIT	(1<<11)
+
+/* IA32_HWP_CAPABILITIES */
+#define HWP_HIGHEST_PERF(x)		(((x) >> 0) & 0xff)
+#define HWP_GUARANTEED_PERF(x)		(((x) >> 8) & 0xff)
+#define HWP_MOSTEFFICIENT_PERF(x)	(((x) >> 16) & 0xff)
+#define HWP_LOWEST_PERF(x)		(((x) >> 24) & 0xff)
+
+/* IA32_HWP_REQUEST */
+#define HWP_MIN_PERF(x)			(x & 0xff)
+#define HWP_MAX_PERF(x)			((x & 0xff) << 8)
+#define HWP_DESIRED_PERF(x)		((x & 0xff) << 16)
+#define HWP_ENERGY_PERF_PREFERENCE(x)	(((unsigned long long) x & 0xff) << 24)
+#define HWP_EPP_PERFORMANCE		0x00
+#define HWP_EPP_BALANCE_PERFORMANCE	0x80
+#define HWP_EPP_BALANCE_POWERSAVE	0xC0
+#define HWP_EPP_POWERSAVE		0xFF
+#define HWP_ACTIVITY_WINDOW(x)		((unsigned long long)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x)		((unsigned long long)(x & 0x1) << 42)
+
+/* IA32_HWP_STATUS */
+#define HWP_GUARANTEED_CHANGE(x)	(x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM(x)	(x & 0x4)
+
+/* IA32_HWP_INTERRUPT */
+#define HWP_CHANGE_TO_GUARANTEED_INT(x)	(x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM_INT(x)	(x & 0x2)
+
+#define MSR_AMD64_MC0_MASK		0xc0010044
+
+#define MSR_IA32_MCx_CTL(x)		(MSR_IA32_MC0_CTL + 4*(x))
+#define MSR_IA32_MCx_STATUS(x)		(MSR_IA32_MC0_STATUS + 4*(x))
+#define MSR_IA32_MCx_ADDR(x)		(MSR_IA32_MC0_ADDR + 4*(x))
+#define MSR_IA32_MCx_MISC(x)		(MSR_IA32_MC0_MISC + 4*(x))
+
+#define MSR_AMD64_MCx_MASK(x)		(MSR_AMD64_MC0_MASK + (x))
+
+/* These are consecutive and not in the normal 4er MCE bank block */
+#define MSR_IA32_MC0_CTL2		0x00000280
+#define MSR_IA32_MCx_CTL2(x)		(MSR_IA32_MC0_CTL2 + (x))
+
+#define MSR_P6_PERFCTR0			0x000000c1
+#define MSR_P6_PERFCTR1			0x000000c2
+#define MSR_P6_EVNTSEL0			0x00000186
+#define MSR_P6_EVNTSEL1			0x00000187
+
+#define MSR_KNC_PERFCTR0               0x00000020
+#define MSR_KNC_PERFCTR1               0x00000021
+#define MSR_KNC_EVNTSEL0               0x00000028
+#define MSR_KNC_EVNTSEL1               0x00000029
+
+/* Alternative perfctr range with full access. */
+#define MSR_IA32_PMC0			0x000004c1
+
+/* AMD64 MSRs. Not complete. See the architecture manual for a more
+   complete list. */
+
+#define MSR_AMD64_PATCH_LEVEL		0x0000008b
+#define MSR_AMD64_TSC_RATIO		0xc0000104
+#define MSR_AMD64_NB_CFG		0xc001001f
+#define MSR_AMD64_PATCH_LOADER		0xc0010020
+#define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140
+#define MSR_AMD64_OSVW_STATUS		0xc0010141
+#define MSR_AMD64_LS_CFG		0xc0011020
+#define MSR_AMD64_DC_CFG		0xc0011022
+#define MSR_AMD64_BU_CFG2		0xc001102a
+#define MSR_AMD64_IBSFETCHCTL		0xc0011030
+#define MSR_AMD64_IBSFETCHLINAD		0xc0011031
+#define MSR_AMD64_IBSFETCHPHYSAD	0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT	3
+#define MSR_AMD64_IBSFETCH_REG_MASK	((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
+#define MSR_AMD64_IBSOPCTL		0xc0011033
+#define MSR_AMD64_IBSOPRIP		0xc0011034
+#define MSR_AMD64_IBSOPDATA		0xc0011035
+#define MSR_AMD64_IBSOPDATA2		0xc0011036
+#define MSR_AMD64_IBSOPDATA3		0xc0011037
+#define MSR_AMD64_IBSDCLINAD		0xc0011038
+#define MSR_AMD64_IBSDCPHYSAD		0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT	7
+#define MSR_AMD64_IBSOP_REG_MASK	((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
+#define MSR_AMD64_IBSCTL		0xc001103a
+#define MSR_AMD64_IBSBRTARGET		0xc001103b
+#define MSR_AMD64_IBSOPDATA4		0xc001103d
+#define MSR_AMD64_IBS_REG_COUNT_MAX	8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SEV			0xc0010131
+#define MSR_AMD64_SEV_ENABLED_BIT	0
+#define MSR_AMD64_SEV_ENABLED		BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF			0xc00000e9
+
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL		0xc0010230
+#define MSR_F16H_L2I_PERF_CTR		0xc0010231
+#define MSR_F16H_DR1_ADDR_MASK		0xc0011019
+#define MSR_F16H_DR2_ADDR_MASK		0xc001101a
+#define MSR_F16H_DR3_ADDR_MASK		0xc001101b
+#define MSR_F16H_DR0_ADDR_MASK		0xc0011027
+
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL		0xc0010200
+#define MSR_F15H_PERF_CTR		0xc0010201
+#define MSR_F15H_NB_PERF_CTL		0xc0010240
+#define MSR_F15H_NB_PERF_CTR		0xc0010241
+#define MSR_F15H_PTSC			0xc0010280
+#define MSR_F15H_IC_CFG			0xc0011021
+
+/* Fam 10h MSRs */
+#define MSR_FAM10H_MMIO_CONF_BASE	0xc0010058
+#define FAM10H_MMIO_CONF_ENABLE		(1<<0)
+#define FAM10H_MMIO_CONF_BUSRANGE_MASK	0xf
+#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
+#define FAM10H_MMIO_CONF_BASE_MASK	0xfffffffULL
+#define FAM10H_MMIO_CONF_BASE_SHIFT	20
+#define MSR_FAM10H_NODE_ID		0xc001100c
+#define MSR_F10H_DECFG			0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT	1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE		BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+
+/* K8 MSRs */
+#define MSR_K8_TOP_MEM1			0xc001001a
+#define MSR_K8_TOP_MEM2			0xc001001d
+#define MSR_K8_SYSCFG			0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT	23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT	BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_K8_INT_PENDING_MSG		0xc0010055
+/* C1E active bits in int pending message */
+#define K8_INTP_C1E_ACTIVE_MASK		0x18000000
+#define MSR_K8_TSEG_ADDR		0xc0010112
+#define MSR_K8_TSEG_MASK		0xc0010113
+#define K8_MTRRFIXRANGE_DRAM_ENABLE	0x00040000 /* MtrrFixDramEn bit    */
+#define K8_MTRRFIXRANGE_DRAM_MODIFY	0x00080000 /* MtrrFixDramModEn bit */
+#define K8_MTRR_RDMEM_WRMEM_MASK	0x18181818 /* Mask: RdMem|WrMem    */
+
+/* K7 MSRs */
+#define MSR_K7_EVNTSEL0			0xc0010000
+#define MSR_K7_PERFCTR0			0xc0010004
+#define MSR_K7_EVNTSEL1			0xc0010001
+#define MSR_K7_PERFCTR1			0xc0010005
+#define MSR_K7_EVNTSEL2			0xc0010002
+#define MSR_K7_PERFCTR2			0xc0010006
+#define MSR_K7_EVNTSEL3			0xc0010003
+#define MSR_K7_PERFCTR3			0xc0010007
+#define MSR_K7_CLK_CTL			0xc001001b
+#define MSR_K7_HWCR			0xc0010015
+#define MSR_K7_HWCR_SMMLOCK_BIT		0
+#define MSR_K7_HWCR_SMMLOCK		BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_FID_VID_CTL		0xc0010041
+#define MSR_K7_FID_VID_STATUS		0xc0010042
+
+/* K6 MSRs */
+#define MSR_K6_WHCR			0xc0000082
+#define MSR_K6_UWCCR			0xc0000085
+#define MSR_K6_EPMR			0xc0000086
+#define MSR_K6_PSOR			0xc0000087
+#define MSR_K6_PFIR			0xc0000088
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1			0x00000107
+#define MSR_IDT_FCR2			0x00000108
+#define MSR_IDT_FCR3			0x00000109
+#define MSR_IDT_FCR4			0x0000010a
+
+#define MSR_IDT_MCR0			0x00000110
+#define MSR_IDT_MCR1			0x00000111
+#define MSR_IDT_MCR2			0x00000112
+#define MSR_IDT_MCR3			0x00000113
+#define MSR_IDT_MCR4			0x00000114
+#define MSR_IDT_MCR5			0x00000115
+#define MSR_IDT_MCR6			0x00000116
+#define MSR_IDT_MCR7			0x00000117
+#define MSR_IDT_MCR_CTRL		0x00000120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR			0x00001107
+#define MSR_VIA_LONGHAUL		0x0000110a
+#define MSR_VIA_RNG			0x0000110b
+#define MSR_VIA_BCR2			0x00001147
+
+/* Transmeta defined MSRs */
+#define MSR_TMTA_LONGRUN_CTRL		0x80868010
+#define MSR_TMTA_LONGRUN_FLAGS		0x80868011
+#define MSR_TMTA_LRTI_READOUT		0x80868018
+#define MSR_TMTA_LRTI_VOLT_MHZ		0x8086801a
+
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR		0x00000000
+#define MSR_IA32_P5_MC_TYPE		0x00000001
+#define MSR_IA32_TSC			0x00000010
+#define MSR_IA32_PLATFORM_ID		0x00000017
+#define MSR_IA32_EBL_CR_POWERON		0x0000002a
+#define MSR_EBC_FREQUENCY_ID		0x0000002c
+#define MSR_SMI_COUNT			0x00000034
+#define MSR_IA32_FEATURE_CONTROL        0x0000003a
+#define MSR_IA32_TSC_ADJUST             0x0000003b
+#define MSR_IA32_BNDCFGS		0x00000d90
+
+#define MSR_IA32_BNDCFGS_RSVD		0x00000ffc
+
+#define MSR_IA32_XSS			0x00000da0
+
+#define FEATURE_CONTROL_LOCKED				(1<<0)
+#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX	(1<<1)
+#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX	(1<<2)
+#define FEATURE_CONTROL_LMCE				(1<<20)
+
+#define MSR_IA32_APICBASE		0x0000001b
+#define MSR_IA32_APICBASE_BSP		(1<<8)
+#define MSR_IA32_APICBASE_ENABLE	(1<<11)
+#define MSR_IA32_APICBASE_BASE		(0xfffff<<12)
+
+#define MSR_IA32_TSCDEADLINE		0x000006e0
+
+#define MSR_IA32_UCODE_WRITE		0x00000079
+#define MSR_IA32_UCODE_REV		0x0000008b
+
+#define MSR_IA32_SMM_MONITOR_CTL	0x0000009b
+#define MSR_IA32_SMBASE			0x0000009e
+
+#define MSR_IA32_PERF_STATUS		0x00000198
+#define MSR_IA32_PERF_CTL		0x00000199
+#define INTEL_PERF_CTL_MASK		0xffff
+#define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
+#define MSR_AMD_PERF_STATUS		0xc0010063
+#define MSR_AMD_PERF_CTL		0xc0010062
+
+#define MSR_IA32_MPERF			0x000000e7
+#define MSR_IA32_APERF			0x000000e8
+
+#define MSR_IA32_THERM_CONTROL		0x0000019a
+#define MSR_IA32_THERM_INTERRUPT	0x0000019b
+
+#define THERM_INT_HIGH_ENABLE		(1 << 0)
+#define THERM_INT_LOW_ENABLE		(1 << 1)
+#define THERM_INT_PLN_ENABLE		(1 << 24)
+
+#define MSR_IA32_THERM_STATUS		0x0000019c
+
+#define THERM_STATUS_PROCHOT		(1 << 0)
+#define THERM_STATUS_POWER_LIMIT	(1 << 10)
+
+#define MSR_THERM2_CTL			0x0000019d
+
+#define MSR_THERM2_CTL_TM_SELECT	(1ULL << 16)
+
+#define MSR_IA32_MISC_ENABLE		0x000001a0
+
+#define MSR_IA32_TEMPERATURE_TARGET	0x000001a2
+
+#define MSR_MISC_FEATURE_CONTROL	0x000001a4
+#define MSR_MISC_PWR_MGMT		0x000001aa
+
+#define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0
+#define ENERGY_PERF_BIAS_PERFORMANCE		0
+#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE	4
+#define ENERGY_PERF_BIAS_NORMAL			6
+#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE	8
+#define ENERGY_PERF_BIAS_POWERSAVE		15
+
+#define MSR_IA32_PACKAGE_THERM_STATUS		0x000001b1
+
+#define PACKAGE_THERM_STATUS_PROCHOT		(1 << 0)
+#define PACKAGE_THERM_STATUS_POWER_LIMIT	(1 << 10)
+
+#define MSR_IA32_PACKAGE_THERM_INTERRUPT	0x000001b2
+
+#define PACKAGE_THERM_INT_HIGH_ENABLE		(1 << 0)
+#define PACKAGE_THERM_INT_LOW_ENABLE		(1 << 1)
+#define PACKAGE_THERM_INT_PLN_ENABLE		(1 << 24)
+
+/* Thermal Thresholds Support */
+#define THERM_INT_THRESHOLD0_ENABLE    (1 << 15)
+#define THERM_SHIFT_THRESHOLD0        8
+#define THERM_MASK_THRESHOLD0          (0x7f << THERM_SHIFT_THRESHOLD0)
+#define THERM_INT_THRESHOLD1_ENABLE    (1 << 23)
+#define THERM_SHIFT_THRESHOLD1        16
+#define THERM_MASK_THRESHOLD1          (0x7f << THERM_SHIFT_THRESHOLD1)
+#define THERM_STATUS_THRESHOLD0        (1 << 6)
+#define THERM_LOG_THRESHOLD0           (1 << 7)
+#define THERM_STATUS_THRESHOLD1        (1 << 8)
+#define THERM_LOG_THRESHOLD1           (1 << 9)
+
+/* MISC_ENABLE bits: architectural */
+#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT		0
+#define MSR_IA32_MISC_ENABLE_FAST_STRING		(1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
+#define MSR_IA32_MISC_ENABLE_TCC_BIT			1
+#define MSR_IA32_MISC_ENABLE_TCC			(1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
+#define MSR_IA32_MISC_ENABLE_EMON_BIT			7
+#define MSR_IA32_MISC_ENABLE_EMON			(1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT		11
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL		(1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT		12
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL		(1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT	16
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP		(1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
+#define MSR_IA32_MISC_ENABLE_MWAIT_BIT			18
+#define MSR_IA32_MISC_ENABLE_MWAIT			(1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT		22
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID		(1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT		23
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT		34
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE			(1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
+
+/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT		2
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT			(1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
+#define MSR_IA32_MISC_ENABLE_TM1_BIT			3
+#define MSR_IA32_MISC_ENABLE_TM1			(1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT	4
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT	6
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT		8
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK		(1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT	9
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_BIT			10
+#define MSR_IA32_MISC_ENABLE_FERR			(1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT		10
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX		(1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
+#define MSR_IA32_MISC_ENABLE_TM2_BIT			13
+#define MSR_IA32_MISC_ENABLE_TM2			(1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT	19
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT		20
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK		(1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT		24
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT		(1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT	37
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT		38
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT	39
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE		(1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
+
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES	0x00000140
+
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT	0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT		BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT	1
+
+#define MSR_IA32_TSC_DEADLINE		0x000006E0
+
+/* P4/Xeon+ specific */
+#define MSR_IA32_MCG_EAX		0x00000180
+#define MSR_IA32_MCG_EBX		0x00000181
+#define MSR_IA32_MCG_ECX		0x00000182
+#define MSR_IA32_MCG_EDX		0x00000183
+#define MSR_IA32_MCG_ESI		0x00000184
+#define MSR_IA32_MCG_EDI		0x00000185
+#define MSR_IA32_MCG_EBP		0x00000186
+#define MSR_IA32_MCG_ESP		0x00000187
+#define MSR_IA32_MCG_EFLAGS		0x00000188
+#define MSR_IA32_MCG_EIP		0x00000189
+#define MSR_IA32_MCG_RESERVED		0x0000018a
+
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0		0x00000300
+#define MSR_P4_BPU_PERFCTR1		0x00000301
+#define MSR_P4_BPU_PERFCTR2		0x00000302
+#define MSR_P4_BPU_PERFCTR3		0x00000303
+#define MSR_P4_MS_PERFCTR0		0x00000304
+#define MSR_P4_MS_PERFCTR1		0x00000305
+#define MSR_P4_MS_PERFCTR2		0x00000306
+#define MSR_P4_MS_PERFCTR3		0x00000307
+#define MSR_P4_FLAME_PERFCTR0		0x00000308
+#define MSR_P4_FLAME_PERFCTR1		0x00000309
+#define MSR_P4_FLAME_PERFCTR2		0x0000030a
+#define MSR_P4_FLAME_PERFCTR3		0x0000030b
+#define MSR_P4_IQ_PERFCTR0		0x0000030c
+#define MSR_P4_IQ_PERFCTR1		0x0000030d
+#define MSR_P4_IQ_PERFCTR2		0x0000030e
+#define MSR_P4_IQ_PERFCTR3		0x0000030f
+#define MSR_P4_IQ_PERFCTR4		0x00000310
+#define MSR_P4_IQ_PERFCTR5		0x00000311
+#define MSR_P4_BPU_CCCR0		0x00000360
+#define MSR_P4_BPU_CCCR1		0x00000361
+#define MSR_P4_BPU_CCCR2		0x00000362
+#define MSR_P4_BPU_CCCR3		0x00000363
+#define MSR_P4_MS_CCCR0			0x00000364
+#define MSR_P4_MS_CCCR1			0x00000365
+#define MSR_P4_MS_CCCR2			0x00000366
+#define MSR_P4_MS_CCCR3			0x00000367
+#define MSR_P4_FLAME_CCCR0		0x00000368
+#define MSR_P4_FLAME_CCCR1		0x00000369
+#define MSR_P4_FLAME_CCCR2		0x0000036a
+#define MSR_P4_FLAME_CCCR3		0x0000036b
+#define MSR_P4_IQ_CCCR0			0x0000036c
+#define MSR_P4_IQ_CCCR1			0x0000036d
+#define MSR_P4_IQ_CCCR2			0x0000036e
+#define MSR_P4_IQ_CCCR3			0x0000036f
+#define MSR_P4_IQ_CCCR4			0x00000370
+#define MSR_P4_IQ_CCCR5			0x00000371
+#define MSR_P4_ALF_ESCR0		0x000003ca
+#define MSR_P4_ALF_ESCR1		0x000003cb
+#define MSR_P4_BPU_ESCR0		0x000003b2
+#define MSR_P4_BPU_ESCR1		0x000003b3
+#define MSR_P4_BSU_ESCR0		0x000003a0
+#define MSR_P4_BSU_ESCR1		0x000003a1
+#define MSR_P4_CRU_ESCR0		0x000003b8
+#define MSR_P4_CRU_ESCR1		0x000003b9
+#define MSR_P4_CRU_ESCR2		0x000003cc
+#define MSR_P4_CRU_ESCR3		0x000003cd
+#define MSR_P4_CRU_ESCR4		0x000003e0
+#define MSR_P4_CRU_ESCR5		0x000003e1
+#define MSR_P4_DAC_ESCR0		0x000003a8
+#define MSR_P4_DAC_ESCR1		0x000003a9
+#define MSR_P4_FIRM_ESCR0		0x000003a4
+#define MSR_P4_FIRM_ESCR1		0x000003a5
+#define MSR_P4_FLAME_ESCR0		0x000003a6
+#define MSR_P4_FLAME_ESCR1		0x000003a7
+#define MSR_P4_FSB_ESCR0		0x000003a2
+#define MSR_P4_FSB_ESCR1		0x000003a3
+#define MSR_P4_IQ_ESCR0			0x000003ba
+#define MSR_P4_IQ_ESCR1			0x000003bb
+#define MSR_P4_IS_ESCR0			0x000003b4
+#define MSR_P4_IS_ESCR1			0x000003b5
+#define MSR_P4_ITLB_ESCR0		0x000003b6
+#define MSR_P4_ITLB_ESCR1		0x000003b7
+#define MSR_P4_IX_ESCR0			0x000003c8
+#define MSR_P4_IX_ESCR1			0x000003c9
+#define MSR_P4_MOB_ESCR0		0x000003aa
+#define MSR_P4_MOB_ESCR1		0x000003ab
+#define MSR_P4_MS_ESCR0			0x000003c0
+#define MSR_P4_MS_ESCR1			0x000003c1
+#define MSR_P4_PMH_ESCR0		0x000003ac
+#define MSR_P4_PMH_ESCR1		0x000003ad
+#define MSR_P4_RAT_ESCR0		0x000003bc
+#define MSR_P4_RAT_ESCR1		0x000003bd
+#define MSR_P4_SAAT_ESCR0		0x000003ae
+#define MSR_P4_SAAT_ESCR1		0x000003af
+#define MSR_P4_SSU_ESCR0		0x000003be
+#define MSR_P4_SSU_ESCR1		0x000003bf /* guess: not in manual */
+
+#define MSR_P4_TBPU_ESCR0		0x000003c2
+#define MSR_P4_TBPU_ESCR1		0x000003c3
+#define MSR_P4_TC_ESCR0			0x000003c4
+#define MSR_P4_TC_ESCR1			0x000003c5
+#define MSR_P4_U2L_ESCR0		0x000003b0
+#define MSR_P4_U2L_ESCR1		0x000003b1
+
+#define MSR_P4_PEBS_MATRIX_VERT		0x000003f2
+
+/* Intel Core-based CPU performance counters */
+#define MSR_CORE_PERF_FIXED_CTR0	0x00000309
+#define MSR_CORE_PERF_FIXED_CTR1	0x0000030a
+#define MSR_CORE_PERF_FIXED_CTR2	0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR_CTRL	0x0000038d
+#define MSR_CORE_PERF_GLOBAL_STATUS	0x0000038e
+#define MSR_CORE_PERF_GLOBAL_CTRL	0x0000038f
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL	0x00000390
+
+/* Geode defined MSRs */
+#define MSR_GEODE_BUSCONT_CONF0		0x00001900
+
+/* Intel VT MSRs */
+#define MSR_IA32_VMX_BASIC              0x00000480
+#define MSR_IA32_VMX_PINBASED_CTLS      0x00000481
+#define MSR_IA32_VMX_PROCBASED_CTLS     0x00000482
+#define MSR_IA32_VMX_EXIT_CTLS          0x00000483
+#define MSR_IA32_VMX_ENTRY_CTLS         0x00000484
+#define MSR_IA32_VMX_MISC               0x00000485
+#define MSR_IA32_VMX_CR0_FIXED0         0x00000486
+#define MSR_IA32_VMX_CR0_FIXED1         0x00000487
+#define MSR_IA32_VMX_CR4_FIXED0         0x00000488
+#define MSR_IA32_VMX_CR4_FIXED1         0x00000489
+#define MSR_IA32_VMX_VMCS_ENUM          0x0000048a
+#define MSR_IA32_VMX_PROCBASED_CTLS2    0x0000048b
+#define MSR_IA32_VMX_EPT_VPID_CAP       0x0000048c
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS  0x0000048d
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT_CTLS      0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS     0x00000490
+#define MSR_IA32_VMX_VMFUNC             0x00000491
+
+/* VMX_BASIC bits and bitmasks */
+#define VMX_BASIC_VMCS_SIZE_SHIFT	32
+#define VMX_BASIC_TRUE_CTLS		(1ULL << 55)
+#define VMX_BASIC_64		0x0001000000000000LLU
+#define VMX_BASIC_MEM_TYPE_SHIFT	50
+#define VMX_BASIC_MEM_TYPE_MASK	0x003c000000000000LLU
+#define VMX_BASIC_MEM_TYPE_WB	6LLU
+#define VMX_BASIC_INOUT		0x0040000000000000LLU
+
+/* MSR_IA32_VMX_MISC bits */
+#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE   0x1F
+/* AMD-V MSRs */
+
+#define MSR_VM_CR                       0xc0010114
+#define MSR_VM_IGNNE                    0xc0010115
+#define MSR_VM_HSAVE_PA                 0xc0010117
+
+#endif /* !SELFTEST_KVM_X86_H */
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
new file mode 100644
index 000000000..d30667706
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -0,0 +1,92 @@
+/*
+ * tools/testing/selftests/kvm/lib/assert.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
+
+#include "test_util.h"
+
+#include <execinfo.h>
+#include <sys/syscall.h>
+
+#include "../../kselftest.h"
+
+/* Dumps the current stack trace to stderr. */
+static void __attribute__((noinline)) test_dump_stack(void);
+static void test_dump_stack(void)
+{
+	/*
+	 * Build and run this command:
+	 *
+	 *	addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
+	 *		grep -v test_dump_stack | cat -n 1>&2
+	 *
+	 * Note that the spacing is different and there's no newline.
+	 */
+	size_t i;
+	size_t n = 20;
+	void *stack[n];
+	const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai";
+	const char *pipeline = "|cat -n 1>&2";
+	char cmd[strlen(addr2line) + strlen(pipeline) +
+		 /* N bytes per addr * 2 digits per byte + 1 space per addr: */
+		 n * (((sizeof(void *)) * 2) + 1) +
+		 /* Null terminator: */
+		 1];
+	char *c;
+
+	n = backtrace(stack, n);
+	c = &cmd[0];
+	c += sprintf(c, "%s", addr2line);
+	/*
+	 * Skip the first 3 frames: backtrace, test_dump_stack, and
+	 * test_assert. We hope that backtrace isn't inlined and the other two
+	 * we've declared noinline.
+	 */
+	for (i = 2; i < n; i++)
+		c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+	c += sprintf(c, "%s", pipeline);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+	system(cmd);
+#pragma GCC diagnostic pop
+}
+
+static pid_t _gettid(void)
+{
+	return syscall(SYS_gettid);
+}
+
+void __attribute__((noinline))
+test_assert(bool exp, const char *exp_str,
+	const char *file, unsigned int line, const char *fmt, ...)
+{
+	va_list ap;
+
+	if (!(exp)) {
+		va_start(ap, fmt);
+
+		fprintf(stderr, "==== Test Assertion Failure ====\n"
+			"  %s:%u: %s\n"
+			"  pid=%d tid=%d - %s\n",
+			file, line, exp_str, getpid(), _gettid(),
+			strerror(errno));
+		test_dump_stack();
+		if (fmt) {
+			fputs("  ", stderr);
+			vfprintf(stderr, fmt, ap);
+			fputs("\n", stderr);
+		}
+		va_end(ap);
+
+		if (errno == EACCES)
+			ksft_exit_skip("Access denied - Exiting.\n");
+		exit(254);
+	}
+
+	return;
+}
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
new file mode 100644
index 000000000..5eb857584
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -0,0 +1,197 @@
+/*
+ * tools/testing/selftests/kvm/lib/elf.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+#include <bits/endian.h>
+#include <linux/elf.h>
+
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
+{
+	off_t offset_rv;
+
+	/* Open the ELF file. */
+	int fd;
+	fd = open(filename, O_RDONLY);
+	TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+		"  filename: %s\n"
+		"  rv: %i errno: %i", filename, fd, errno);
+
+	/* Read in and validate ELF Identification Record.
+	 * The ELF Identification record is the first 16 (EI_NIDENT) bytes
+	 * of the ELF header, which is at the beginning of the ELF file.
+	 * For now it is only safe to read the first EI_NIDENT bytes.  Once
+	 * read and validated, the value of e_ehsize can be used to determine
+	 * the real size of the ELF header.
+	 */
+	unsigned char ident[EI_NIDENT];
+	test_read(fd, ident, sizeof(ident));
+	TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1)
+		&& (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3),
+		"ELF MAGIC Mismatch,\n"
+		"  filename: %s\n"
+		"  ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n"
+		"  Expected: %02x %02x %02x %02x",
+		filename,
+		ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3],
+		ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3);
+	TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64,
+		"Current implementation only able to handle ELFCLASS64,\n"
+		"  filename: %s\n"
+		"  ident[EI_CLASS]: %02x\n"
+		"  expected: %02x",
+		filename,
+		ident[EI_CLASS], ELFCLASS64);
+	TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN)
+			&& (ident[EI_DATA] == ELFDATA2LSB))
+		|| ((BYTE_ORDER == BIG_ENDIAN)
+			&& (ident[EI_DATA] == ELFDATA2MSB)), "Current "
+		"implementation only able to handle\n"
+		"cases where the host and ELF file endianness\n"
+		"is the same:\n"
+		"  host BYTE_ORDER: %u\n"
+		"  host LITTLE_ENDIAN: %u\n"
+		"  host BIG_ENDIAN: %u\n"
+		"  ident[EI_DATA]: %u\n"
+		"  ELFDATA2LSB: %u\n"
+		"  ELFDATA2MSB: %u",
+		BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN,
+		ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB);
+	TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT,
+		"Current implementation only able to handle current "
+		"ELF version,\n"
+		"  filename: %s\n"
+		"  ident[EI_VERSION]: %02x\n"
+		"  expected: %02x",
+		filename, ident[EI_VERSION], EV_CURRENT);
+
+	/* Read in the ELF header.
+	 * With the ELF Identification portion of the ELF header
+	 * validated, especially that the value at EI_VERSION is
+	 * as expected, it is now safe to read the entire ELF header.
+	 */
+	offset_rv = lseek(fd, 0, SEEK_SET);
+	TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n"
+		"  rv: %zi expected: %i", offset_rv, 0);
+	test_read(fd, hdrp, sizeof(*hdrp));
+	TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr),
+		"Unexpected physical header size,\n"
+		"  hdrp->e_phentsize: %x\n"
+		"  expected: %zx",
+		hdrp->e_phentsize, sizeof(Elf64_Phdr));
+	TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr),
+		"Unexpected section header size,\n"
+		"  hdrp->e_shentsize: %x\n"
+		"  expected: %zx",
+		hdrp->e_shentsize, sizeof(Elf64_Shdr));
+}
+
+/* VM ELF Load
+ *
+ * Input Args:
+ *   filename - Path to ELF file
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ *   vm - Pointer to opaque type that describes the VM.
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Loads the program image of the ELF file specified by filename,
+ * into the virtual address space of the VM pointed to by vm.  On entry
+ * the VM needs to not be using any of the virtual address space used
+ * by the image and it needs to have sufficient available physical pages, to
+ * back the virtual pages used to load the image.
+ */
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+	uint32_t data_memslot, uint32_t pgd_memslot)
+{
+	off_t offset, offset_rv;
+	Elf64_Ehdr hdr;
+
+	/* Open the ELF file. */
+	int fd;
+	fd = open(filename, O_RDONLY);
+	TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+		"  filename: %s\n"
+		"  rv: %i errno: %i", filename, fd, errno);
+
+	/* Read in the ELF header. */
+	elfhdr_get(filename, &hdr);
+
+	/* For each program header.
+	 * The following ELF header members specify the location
+	 * and size of the program headers:
+	 *
+	 *   e_phoff - File offset to start of program headers
+	 *   e_phentsize - Size of each program header
+	 *   e_phnum - Number of program header entries
+	 */
+	for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) {
+		/* Seek to the beginning of the program header. */
+		offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
+		offset_rv = lseek(fd, offset, SEEK_SET);
+		TEST_ASSERT(offset_rv == offset,
+			"Failed to seek to begining of program header %u,\n"
+			"  filename: %s\n"
+			"  rv: %jd errno: %i",
+			n1, filename, (intmax_t) offset_rv, errno);
+
+		/* Read in the program header. */
+		Elf64_Phdr phdr;
+		test_read(fd, &phdr, sizeof(phdr));
+
+		/* Skip if this header doesn't describe a loadable segment. */
+		if (phdr.p_type != PT_LOAD)
+			continue;
+
+		/* Allocate memory for this segment within the VM. */
+		TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment "
+			"memsize of 0,\n"
+			"  phdr index: %u p_memsz: 0x%" PRIx64,
+			n1, (uint64_t) phdr.p_memsz);
+		vm_vaddr_t seg_vstart = phdr.p_vaddr;
+		seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+		vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
+		seg_vend |= vm->page_size - 1;
+		size_t seg_size = seg_vend - seg_vstart + 1;
+
+		vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
+			data_memslot, pgd_memslot);
+		TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
+			"virtual memory for segment at requested min addr,\n"
+			"  segment idx: %u\n"
+			"  seg_vstart: 0x%lx\n"
+			"  vaddr: 0x%lx",
+			n1, seg_vstart, vaddr);
+		memset(addr_gva2hva(vm, vaddr), 0, seg_size);
+		/* TODO(lhuemill): Set permissions of each memory segment
+		 * based on the least-significant 3 bits of phdr.p_flags.
+		 */
+
+		/* Load portion of initial state that is contained within
+		 * the ELF file.
+		 */
+		if (phdr.p_filesz) {
+			offset_rv = lseek(fd, phdr.p_offset, SEEK_SET);
+			TEST_ASSERT(offset_rv == phdr.p_offset,
+				"Seek to program segment offset failed,\n"
+				"  program header idx: %u errno: %i\n"
+				"  offset_rv: 0x%jx\n"
+				"  expected: 0x%jx\n",
+				n1, errno, (intmax_t) offset_rv,
+				(intmax_t) phdr.p_offset);
+			test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
+				phdr.p_filesz);
+		}
+	}
+}
diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c
new file mode 100644
index 000000000..cff869ffe
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/io.c
@@ -0,0 +1,158 @@
+/*
+ * tools/testing/selftests/kvm/lib/io.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+/* Test Write
+ *
+ * A wrapper for write(2), that automatically handles the following
+ * special conditions:
+ *
+ *   + Interrupted system call (EINTR)
+ *   + Write of less than requested amount
+ *   + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional write is performed to automatically
+ * continue writing the requested data.
+ * There are also many cases where write(2) can return an unexpected
+ * error (e.g. EIO).  Such errors cause a TEST_ASSERT failure.
+ *
+ * Note, for function signature compatibility with write(2), this function
+ * returns the number of bytes written, but that value will always be equal
+ * to the number of requested bytes.  All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * write(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ *  fd    - Opened file descriptor to file to be written.
+ *  count - Number of bytes to write.
+ *
+ * Output:
+ *  buf   - Starting address of data to be written.
+ *
+ * Return:
+ *  On success, number of bytes written.
+ *  On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_write(int fd, const void *buf, size_t count)
+{
+	ssize_t rc;
+	ssize_t num_written = 0;
+	size_t num_left = count;
+	const char *ptr = buf;
+
+	/* Note: Count of zero is allowed (see "RETURN VALUE" portion of
+	 * write(2) manpage for details.
+	 */
+	TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+	do {
+		rc = write(fd, ptr, num_left);
+
+		switch (rc) {
+		case -1:
+			TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+				    "Unexpected write failure,\n"
+				    "  rc: %zi errno: %i", rc, errno);
+			continue;
+
+		case 0:
+			TEST_ASSERT(false, "Unexpected EOF,\n"
+				    "  rc: %zi num_written: %zi num_left: %zu",
+				    rc, num_written, num_left);
+			break;
+
+		default:
+			TEST_ASSERT(rc >= 0, "Unexpected ret from write,\n"
+				"  rc: %zi errno: %i", rc, errno);
+			num_written += rc;
+			num_left -= rc;
+			ptr += rc;
+			break;
+		}
+	} while (num_written < count);
+
+	return num_written;
+}
+
+/* Test Read
+ *
+ * A wrapper for read(2), that automatically handles the following
+ * special conditions:
+ *
+ *   + Interrupted system call (EINTR)
+ *   + Read of less than requested amount
+ *   + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional read is performed to automatically
+ * continue reading the requested data.
+ * There are also many cases where read(2) can return an unexpected
+ * error (e.g. EIO).  Such errors cause a TEST_ASSERT failure.  Note,
+ * it is expected that the file opened by fd at the current file position
+ * contains at least the number of requested bytes to be read.  A TEST_ASSERT
+ * failure is produced if an End-Of-File condition occurs, before all the
+ * data is read.  It is the callers responsibility to assure that sufficient
+ * data exists.
+ *
+ * Note, for function signature compatibility with read(2), this function
+ * returns the number of bytes read, but that value will always be equal
+ * to the number of requested bytes.  All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * read(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ *  fd    - Opened file descriptor to file to be read.
+ *  count - Number of bytes to read.
+ *
+ * Output:
+ *  buf   - Starting address of where to write the bytes read.
+ *
+ * Return:
+ *  On success, number of bytes read.
+ *  On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_read(int fd, void *buf, size_t count)
+{
+	ssize_t rc;
+	ssize_t num_read = 0;
+	size_t num_left = count;
+	char *ptr = buf;
+
+	/* Note: Count of zero is allowed (see "If count is zero" portion of
+	 * read(2) manpage for details.
+	 */
+	TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+	do {
+		rc = read(fd, ptr, num_left);
+
+		switch (rc) {
+		case -1:
+			TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+				    "Unexpected read failure,\n"
+				    "  rc: %zi errno: %i", rc, errno);
+			break;
+
+		case 0:
+			TEST_ASSERT(false, "Unexpected EOF,\n"
+				    "  rc: %zi num_read: %zi num_left: %zu",
+				    rc, num_read, num_left);
+			break;
+
+		default:
+			TEST_ASSERT(rc > 0, "Unexpected ret from read,\n"
+				    "  rc: %zi errno: %i", rc, errno);
+			num_read += rc;
+			num_left -= rc;
+			ptr += rc;
+			break;
+		}
+	} while (num_read < count);
+
+	return num_read;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
new file mode 100644
index 000000000..b138fd5e4
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -0,0 +1,1680 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <linux/kernel.h>
+
+#define KVM_DEV_PATH "/dev/kvm"
+
+#define KVM_UTIL_PGS_PER_HUGEPG 512
+#define KVM_UTIL_MIN_PADDR      0x2000
+
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static void *align(void *x, size_t size)
+{
+	size_t mask = size - 1;
+	TEST_ASSERT(size != 0 && !(size & (size - 1)),
+		    "size not a power of 2: %lu", size);
+	return (void *) (((size_t) x + mask) & ~mask);
+}
+
+/* Capability
+ *
+ * Input Args:
+ *   cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   On success, the Value corresponding to the capability (KVM_CAP_*)
+ *   specified by the value of cap.  On failure a TEST_ASSERT failure
+ *   is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int kvm_check_cap(long cap)
+{
+	int ret;
+	int kvm_fd;
+
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
+	TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+		"  rc: %i errno: %i", ret, errno);
+
+	close(kvm_fd);
+
+	return ret;
+}
+
+/* VM Enable Capability
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ *
+ * Enables a capability (KVM_CAP_*) on the VM.
+ */
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
+{
+	int ret;
+
+	ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
+	TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
+		"  rc: %i errno: %i", ret, errno);
+
+	return ret;
+}
+
+static void vm_open(struct kvm_vm *vm, int perm)
+{
+	vm->kvm_fd = open(KVM_DEV_PATH, perm);
+	if (vm->kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	/* Create VM. */
+	vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL);
+	TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
+		"rc: %i errno: %i", vm->fd, errno);
+}
+
+/* VM Create
+ *
+ * Input Args:
+ *   mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ *   phy_pages - Physical memory pages
+ *   perm - permission
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * When phy_pages is non-zero, a memory region of phy_pages physical pages
+ * is created and mapped starting at guest physical address 0.  The file
+ * descriptor to control the created VM is created with the permissions
+ * given by perm (e.g. O_RDWR).
+ */
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+{
+	struct kvm_vm *vm;
+	int kvm_fd;
+
+	/* Allocate memory. */
+	vm = calloc(1, sizeof(*vm));
+	TEST_ASSERT(vm != NULL, "Insufficent Memory");
+
+	vm->mode = mode;
+	vm_open(vm, perm);
+
+	/* Setup mode specific traits. */
+	switch (vm->mode) {
+	case VM_MODE_FLAT48PG:
+		vm->page_size = 0x1000;
+		vm->page_shift = 12;
+
+		/* Limit to 48-bit canonical virtual addresses. */
+		vm->vpages_valid = sparsebit_alloc();
+		sparsebit_set_num(vm->vpages_valid,
+			0, (1ULL << (48 - 1)) >> vm->page_shift);
+		sparsebit_set_num(vm->vpages_valid,
+			(~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
+			(1ULL << (48 - 1)) >> vm->page_shift);
+
+		/* Limit physical addresses to 52-bits. */
+		vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
+		break;
+
+	default:
+		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
+	}
+
+	/* Allocate and setup memory for guest. */
+	vm->vpages_mapped = sparsebit_alloc();
+	if (phy_pages != 0)
+		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+					    0, 0, phy_pages, 0);
+
+	return vm;
+}
+
+/* VM Restart
+ *
+ * Input Args:
+ *   vm - VM that has been released before
+ *   perm - permission
+ *
+ * Output Args: None
+ *
+ * Reopens the file descriptors associated to the VM and reinstates the
+ * global state, such as the irqchip and the memory regions that are mapped
+ * into the guest.
+ */
+void kvm_vm_restart(struct kvm_vm *vmp, int perm)
+{
+	struct userspace_mem_region *region;
+
+	vm_open(vmp, perm);
+	if (vmp->has_irqchip)
+		vm_create_irqchip(vmp);
+
+	for (region = vmp->userspace_mem_region_head; region;
+		region = region->next) {
+		int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+			    "  rc: %i errno: %i\n"
+			    "  slot: %u flags: 0x%x\n"
+			    "  guest_phys_addr: 0x%lx size: 0x%lx",
+			    ret, errno, region->region.slot, region->region.flags,
+			    region->region.guest_phys_addr,
+			    region->region.memory_size);
+	}
+}
+
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+	struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
+	int ret;
+
+	ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
+	TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
+		    strerror(-ret));
+}
+
+/* Userspace Memory Region Find
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   start - Starting VM physical address
+ *   end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to overlapping region, NULL if no such region.
+ *
+ * Searches for a region with any physical memory that overlaps with
+ * any portion of the guest physical addresses from start to end
+ * inclusive.  If multiple overlapping regions exist, a pointer to any
+ * of the regions is returned.  Null is returned only when no overlapping
+ * region exists.
+ */
+static struct userspace_mem_region *userspace_mem_region_find(
+	struct kvm_vm *vm, uint64_t start, uint64_t end)
+{
+	struct userspace_mem_region *region;
+
+	for (region = vm->userspace_mem_region_head; region;
+		region = region->next) {
+		uint64_t existing_start = region->region.guest_phys_addr;
+		uint64_t existing_end = region->region.guest_phys_addr
+			+ region->region.memory_size - 1;
+		if (start <= existing_end && end >= existing_start)
+			return region;
+	}
+
+	return NULL;
+}
+
+/* KVM Userspace Memory Region Find
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   start - Starting VM physical address
+ *   end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to overlapping region, NULL if no such region.
+ *
+ * Public interface to userspace_mem_region_find. Allows tests to look up
+ * the memslot datastructure for a given range of guest physical memory.
+ */
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+				 uint64_t end)
+{
+	struct userspace_mem_region *region;
+
+	region = userspace_mem_region_find(vm, start, end);
+	if (!region)
+		return NULL;
+
+	return &region->region;
+}
+
+/* VCPU Find
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to VCPU structure
+ *
+ * Locates a vcpu structure that describes the VCPU specified by vcpuid and
+ * returns a pointer to it.  Returns NULL if the VM doesn't contain a VCPU
+ * for the specified vcpuid.
+ */
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+	uint32_t vcpuid)
+{
+	struct vcpu *vcpup;
+
+	for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
+		if (vcpup->id == vcpuid)
+			return vcpup;
+	}
+
+	return NULL;
+}
+
+/* VM VCPU Remove
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ */
+static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	ret = munmap(vcpu->state, sizeof(*vcpu->state));
+	TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
+		"errno: %i", ret, errno);
+	close(vcpu->fd);
+	TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
+		"errno: %i", ret, errno);
+
+	if (vcpu->next)
+		vcpu->next->prev = vcpu->prev;
+	if (vcpu->prev)
+		vcpu->prev->next = vcpu->next;
+	else
+		vm->vcpu_head = vcpu->next;
+	free(vcpu);
+}
+
+void kvm_vm_release(struct kvm_vm *vmp)
+{
+	int ret;
+
+	/* Free VCPUs. */
+	while (vmp->vcpu_head)
+		vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+
+	/* Close file descriptor for the VM. */
+	ret = close(vmp->fd);
+	TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
+		"  vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+
+	close(vmp->kvm_fd);
+	TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
+		"  vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
+}
+
+/* Destroys and frees the VM pointed to by vmp.
+ */
+void kvm_vm_free(struct kvm_vm *vmp)
+{
+	int ret;
+
+	if (vmp == NULL)
+		return;
+
+	/* Free userspace_mem_regions. */
+	while (vmp->userspace_mem_region_head) {
+		struct userspace_mem_region *region
+			= vmp->userspace_mem_region_head;
+
+		region->region.memory_size = 0;
+		ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
+			&region->region);
+		TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+			"rc: %i errno: %i", ret, errno);
+
+		vmp->userspace_mem_region_head = region->next;
+		sparsebit_free(&region->unused_phy_pages);
+		ret = munmap(region->mmap_start, region->mmap_size);
+		TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
+			    ret, errno);
+
+		free(region);
+	}
+
+	/* Free sparsebit arrays. */
+	sparsebit_free(&vmp->vpages_valid);
+	sparsebit_free(&vmp->vpages_mapped);
+
+	kvm_vm_release(vmp);
+
+	/* Free the structure describing the VM. */
+	free(vmp);
+}
+
+/* Memory Compare, host virtual to guest virtual
+ *
+ * Input Args:
+ *   hva - Starting host virtual address
+ *   vm - Virtual Machine
+ *   gva - Starting guest virtual address
+ *   len - number of bytes to compare
+ *
+ * Output Args: None
+ *
+ * Input/Output Args: None
+ *
+ * Return:
+ *   Returns 0 if the bytes starting at hva for a length of len
+ *   are equal the guest virtual bytes starting at gva.  Returns
+ *   a value < 0, if bytes at hva are less than those at gva.
+ *   Otherwise a value > 0 is returned.
+ *
+ * Compares the bytes starting at the host virtual address hva, for
+ * a length of len, to the guest bytes starting at the guest virtual
+ * address given by gva.
+ */
+int kvm_memcmp_hva_gva(void *hva,
+	struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+{
+	size_t amt;
+
+	/* Compare a batch of bytes until either a match is found
+	 * or all the bytes have been compared.
+	 */
+	for (uintptr_t offset = 0; offset < len; offset += amt) {
+		uintptr_t ptr1 = (uintptr_t)hva + offset;
+
+		/* Determine host address for guest virtual address
+		 * at offset.
+		 */
+		uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
+
+		/* Determine amount to compare on this pass.
+		 * Don't allow the comparsion to cross a page boundary.
+		 */
+		amt = len - offset;
+		if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
+			amt = vm->page_size - (ptr1 % vm->page_size);
+		if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
+			amt = vm->page_size - (ptr2 % vm->page_size);
+
+		assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
+		assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
+
+		/* Perform the comparison.  If there is a difference
+		 * return that result to the caller, otherwise need
+		 * to continue on looking for a mismatch.
+		 */
+		int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
+		if (ret != 0)
+			return ret;
+	}
+
+	/* No mismatch found.  Let the caller know the two memory
+	 * areas are equal.
+	 */
+	return 0;
+}
+
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+	struct kvm_cpuid2 *cpuid;
+	int nent = 100;
+	size_t size;
+
+	size = sizeof(*cpuid);
+	size += nent * sizeof(struct kvm_cpuid_entry2);
+	cpuid = malloc(size);
+	if (!cpuid) {
+		perror("malloc");
+		abort();
+	}
+
+	cpuid->nent = nent;
+
+	return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *
+ * Return: The supported KVM CPUID
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+	static struct kvm_cpuid2 *cpuid;
+	int ret;
+	int kvm_fd;
+
+	if (cpuid)
+		return cpuid;
+
+	cpuid = allocate_kvm_cpuid2();
+	kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (kvm_fd < 0)
+		exit(KSFT_SKIP);
+
+	ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+	TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+		    ret, errno);
+
+	close(kvm_fd);
+	return cpuid;
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ *   cpuid: The cpuid.
+ *   function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+{
+	struct kvm_cpuid2 *cpuid;
+	struct kvm_cpuid_entry2 *entry = NULL;
+	int i;
+
+	cpuid = kvm_get_supported_cpuid();
+	for (i = 0; i < cpuid->nent; i++) {
+		if (cpuid->entries[i].function == function &&
+		    cpuid->entries[i].index == index) {
+			entry = &cpuid->entries[i];
+			break;
+		}
+	}
+
+	TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+		    function, index);
+	return entry;
+}
+
+/* VM Userspace Memory Region Add
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   backing_src - Storage source for this region.
+ *                 NULL to use anonymous memory.
+ *   guest_paddr - Starting guest physical address
+ *   slot - KVM region slot
+ *   npages - Number of physical pages
+ *   flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Allocates a memory area of the number of pages specified by npages
+ * and maps it to the VM specified by vm, at a starting physical address
+ * given by guest_paddr.  The region is created with a KVM region slot
+ * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM.  The
+ * region is created with the flags given by flags.
+ */
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+	enum vm_mem_backing_src_type src_type,
+	uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+	uint32_t flags)
+{
+	int ret;
+	unsigned long pmem_size = 0;
+	struct userspace_mem_region *region;
+	size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+
+	TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+		"address not on a page boundary.\n"
+		"  guest_paddr: 0x%lx vm->page_size: 0x%x",
+		guest_paddr, vm->page_size);
+	TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+		<= vm->max_gfn, "Physical range beyond maximum "
+		"supported physical address,\n"
+		"  guest_paddr: 0x%lx npages: 0x%lx\n"
+		"  vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		guest_paddr, npages, vm->max_gfn, vm->page_size);
+
+	/* Confirm a mem region with an overlapping address doesn't
+	 * already exist.
+	 */
+	region = (struct userspace_mem_region *) userspace_mem_region_find(
+		vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
+	if (region != NULL)
+		TEST_ASSERT(false, "overlapping userspace_mem_region already "
+			"exists\n"
+			"  requested guest_paddr: 0x%lx npages: 0x%lx "
+			"page_size: 0x%x\n"
+			"  existing guest_paddr: 0x%lx size: 0x%lx",
+			guest_paddr, npages, vm->page_size,
+			(uint64_t) region->region.guest_phys_addr,
+			(uint64_t) region->region.memory_size);
+
+	/* Confirm no region with the requested slot already exists. */
+	for (region = vm->userspace_mem_region_head; region;
+		region = region->next) {
+		if (region->region.slot == slot)
+			break;
+	}
+	if (region != NULL)
+		TEST_ASSERT(false, "A mem region with the requested slot "
+			"already exists.\n"
+			"  requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
+			"  existing slot: %u paddr: 0x%lx size: 0x%lx",
+			slot, guest_paddr, npages,
+			region->region.slot,
+			(uint64_t) region->region.guest_phys_addr,
+			(uint64_t) region->region.memory_size);
+
+	/* Allocate and initialize new mem region structure. */
+	region = calloc(1, sizeof(*region));
+	TEST_ASSERT(region != NULL, "Insufficient Memory");
+	region->mmap_size = npages * vm->page_size;
+
+	/* Enough memory to align up to a huge page. */
+	if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
+		region->mmap_size += huge_page_size;
+	region->mmap_start = mmap(NULL, region->mmap_size,
+				  PROT_READ | PROT_WRITE,
+				  MAP_PRIVATE | MAP_ANONYMOUS
+				  | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
+				  -1, 0);
+	TEST_ASSERT(region->mmap_start != MAP_FAILED,
+		    "test_malloc failed, mmap_start: %p errno: %i",
+		    region->mmap_start, errno);
+
+	/* Align THP allocation up to start of a huge page. */
+	region->host_mem = align(region->mmap_start,
+				 src_type == VM_MEM_SRC_ANONYMOUS_THP ?  huge_page_size : 1);
+
+	/* As needed perform madvise */
+	if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
+		ret = madvise(region->host_mem, npages * vm->page_size,
+			     src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+		TEST_ASSERT(ret == 0, "madvise failed,\n"
+			    "  addr: %p\n"
+			    "  length: 0x%lx\n"
+			    "  src_type: %x",
+			    region->host_mem, npages * vm->page_size, src_type);
+	}
+
+	region->unused_phy_pages = sparsebit_alloc();
+	sparsebit_set_num(region->unused_phy_pages,
+		guest_paddr >> vm->page_shift, npages);
+	region->region.slot = slot;
+	region->region.flags = flags;
+	region->region.guest_phys_addr = guest_paddr;
+	region->region.memory_size = npages * vm->page_size;
+	region->region.userspace_addr = (uintptr_t) region->host_mem;
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+		"  rc: %i errno: %i\n"
+		"  slot: %u flags: 0x%x\n"
+		"  guest_phys_addr: 0x%lx size: 0x%lx",
+		ret, errno, slot, flags,
+		guest_paddr, (uint64_t) region->region.memory_size);
+
+	/* Add to linked-list of memory regions. */
+	if (vm->userspace_mem_region_head)
+		vm->userspace_mem_region_head->prev = region;
+	region->next = vm->userspace_mem_region_head;
+	vm->userspace_mem_region_head = region;
+}
+
+/* Memslot to region
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   memslot - KVM memory slot ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to memory region structure that describe memory region
+ *   using kvm memory slot ID given by memslot.  TEST_ASSERT failure
+ *   on error (e.g. currently no memory region using memslot as a KVM
+ *   memory slot ID).
+ */
+static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
+	uint32_t memslot)
+{
+	struct userspace_mem_region *region;
+
+	for (region = vm->userspace_mem_region_head; region;
+		region = region->next) {
+		if (region->region.slot == memslot)
+			break;
+	}
+	if (region == NULL) {
+		fprintf(stderr, "No mem region with the requested slot found,\n"
+			"  requested slot: %u\n", memslot);
+		fputs("---- vm dump ----\n", stderr);
+		vm_dump(stderr, vm, 2);
+		TEST_ASSERT(false, "Mem region not found");
+	}
+
+	return region;
+}
+
+/* VM Memory Region Flags Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   flags - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the flags of the memory region specified by the value of slot,
+ * to the values given by flags.
+ */
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
+{
+	int ret;
+	struct userspace_mem_region *region;
+
+	/* Locate memory region. */
+	region = memslot2region(vm, slot);
+
+	region->region.flags = flags;
+
+	ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+
+	TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+		"  rc: %i errno: %i slot: %u flags: 0x%x",
+		ret, errno, slot, flags);
+}
+
+/* VCPU mmap Size
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Size of VCPU state
+ *
+ * Returns the size of the structure pointed to by the return value
+ * of vcpu_state().
+ */
+static int vcpu_mmap_sz(void)
+{
+	int dev_fd, ret;
+
+	dev_fd = open(KVM_DEV_PATH, O_RDONLY);
+	if (dev_fd < 0)
+		exit(KSFT_SKIP);
+
+	ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+	TEST_ASSERT(ret >= sizeof(struct kvm_run),
+		"%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
+		__func__, ret, errno);
+
+	close(dev_fd);
+
+	return ret;
+}
+
+/* VM VCPU Add
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates and adds to the VM specified by vm and virtual CPU with
+ * the ID given by vcpuid.
+ */
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot)
+{
+	struct vcpu *vcpu;
+
+	/* Confirm a vcpu with the specified id doesn't already exist. */
+	vcpu = vcpu_find(vm, vcpuid);
+	if (vcpu != NULL)
+		TEST_ASSERT(false, "vcpu with the specified id "
+			"already exists,\n"
+			"  requested vcpuid: %u\n"
+			"  existing vcpuid: %u state: %p",
+			vcpuid, vcpu->id, vcpu->state);
+
+	/* Allocate and initialize new vcpu structure. */
+	vcpu = calloc(1, sizeof(*vcpu));
+	TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
+	vcpu->id = vcpuid;
+	vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
+	TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
+		vcpu->fd, errno);
+
+	TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+		"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+		vcpu_mmap_sz(), sizeof(*vcpu->state));
+	vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+		PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
+	TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
+		"vcpu id: %u errno: %i", vcpuid, errno);
+
+	/* Add to linked-list of VCPUs. */
+	if (vm->vcpu_head)
+		vm->vcpu_head->prev = vcpu;
+	vcpu->next = vm->vcpu_head;
+	vm->vcpu_head = vcpu;
+
+	vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot);
+}
+
+/* VM Virtual Address Unused Gap
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   sz - Size (bytes)
+ *   vaddr_min - Minimum Virtual Address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Lowest virtual address at or below vaddr_min, with at least
+ *   sz unused bytes.  TEST_ASSERT failure if no area of at least
+ *   size sz is available.
+ *
+ * Within the VM specified by vm, locates the lowest starting virtual
+ * address >= vaddr_min, that has at least sz unallocated bytes.  A
+ * TEST_ASSERT failure occurs for invalid input or no area of at least
+ * sz unallocated bytes >= vaddr_min is available.
+ */
+static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+	vm_vaddr_t vaddr_min)
+{
+	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
+
+	/* Determine lowest permitted virtual page index. */
+	uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
+	if ((pgidx_start * vm->page_size) < vaddr_min)
+			goto no_va_found;
+
+	/* Loop over section with enough valid virtual page indexes. */
+	if (!sparsebit_is_set_num(vm->vpages_valid,
+		pgidx_start, pages))
+		pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
+			pgidx_start, pages);
+	do {
+		/*
+		 * Are there enough unused virtual pages available at
+		 * the currently proposed starting virtual page index.
+		 * If not, adjust proposed starting index to next
+		 * possible.
+		 */
+		if (sparsebit_is_clear_num(vm->vpages_mapped,
+			pgidx_start, pages))
+			goto va_found;
+		pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
+			pgidx_start, pages);
+		if (pgidx_start == 0)
+			goto no_va_found;
+
+		/*
+		 * If needed, adjust proposed starting virtual address,
+		 * to next range of valid virtual addresses.
+		 */
+		if (!sparsebit_is_set_num(vm->vpages_valid,
+			pgidx_start, pages)) {
+			pgidx_start = sparsebit_next_set_num(
+				vm->vpages_valid, pgidx_start, pages);
+			if (pgidx_start == 0)
+				goto no_va_found;
+		}
+	} while (pgidx_start != 0);
+
+no_va_found:
+	TEST_ASSERT(false, "No vaddr of specified pages available, "
+		"pages: 0x%lx", pages);
+
+	/* NOT REACHED */
+	return -1;
+
+va_found:
+	TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
+		pgidx_start, pages),
+		"Unexpected, invalid virtual page index range,\n"
+		"  pgidx_start: 0x%lx\n"
+		"  pages: 0x%lx",
+		pgidx_start, pages);
+	TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
+		pgidx_start, pages),
+		"Unexpected, pages already mapped,\n"
+		"  pgidx_start: 0x%lx\n"
+		"  pages: 0x%lx",
+		pgidx_start, pages);
+
+	return pgidx_start * vm->page_size;
+}
+
+/* VM Virtual Address Allocate
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   sz - Size in bytes
+ *   vaddr_min - Minimum starting virtual address
+ *   data_memslot - Memory region slot for data pages
+ *   pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Starting guest virtual address
+ *
+ * Allocates at least sz bytes within the virtual address space of the vm
+ * given by vm.  The allocated bytes are mapped to a virtual address >=
+ * the address given by vaddr_min.  Note that each allocation uses a
+ * a unique set of pages, with the minimum real allocation being at least
+ * a page.
+ */
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+	uint32_t data_memslot, uint32_t pgd_memslot)
+{
+	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+
+	virt_pgd_alloc(vm, pgd_memslot);
+
+	/* Find an unused range of virtual page addresses of at least
+	 * pages in length.
+	 */
+	vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+	/* Map the virtual pages. */
+	for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+		pages--, vaddr += vm->page_size) {
+		vm_paddr_t paddr;
+
+		paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+
+		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+
+		sparsebit_set(vm->vpages_mapped,
+			vaddr >> vm->page_shift);
+	}
+
+	return vaddr_start;
+}
+
+/*
+ * Map a range of VM virtual address to the VM's physical address
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vaddr - Virtuall address to map
+ *   paddr - VM Physical Address
+ *   size - The size of the range to map
+ *   pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the
+ * page range starting at vaddr to the page range starting at paddr.
+ */
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	      size_t size, uint32_t pgd_memslot)
+{
+	size_t page_size = vm->page_size;
+	size_t npages = size / page_size;
+
+	TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
+	TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+	while (npages--) {
+		virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+		vaddr += page_size;
+		paddr += page_size;
+	}
+}
+
+/* Address VM Physical to Host Virtual
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gpa - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent host virtual address
+ *
+ * Locates the memory region containing the VM physical address given
+ * by gpa, within the VM given by vm.  When found, the host virtual
+ * address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing gpa exists.
+ */
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+	struct userspace_mem_region *region;
+	for (region = vm->userspace_mem_region_head; region;
+	     region = region->next) {
+		if ((gpa >= region->region.guest_phys_addr)
+			&& (gpa <= (region->region.guest_phys_addr
+				+ region->region.memory_size - 1)))
+			return (void *) ((uintptr_t) region->host_mem
+				+ (gpa - region->region.guest_phys_addr));
+	}
+
+	TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
+	return NULL;
+}
+
+/* Address Host Virtual to VM Physical
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   hva - Host virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent VM physical address
+ *
+ * Locates the memory region containing the host virtual address given
+ * by hva, within the VM given by vm.  When found, the equivalent
+ * VM physical address is returned. A TEST_ASSERT failure occurs if no
+ * region containing hva exists.
+ */
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
+{
+	struct userspace_mem_region *region;
+	for (region = vm->userspace_mem_region_head; region;
+	     region = region->next) {
+		if ((hva >= region->host_mem)
+			&& (hva <= (region->host_mem
+				+ region->region.memory_size - 1)))
+			return (vm_paddr_t) ((uintptr_t)
+				region->region.guest_phys_addr
+				+ (hva - (uintptr_t) region->host_mem));
+	}
+
+	TEST_ASSERT(false, "No mapping to a guest physical address, "
+		"hva: %p", hva);
+	return -1;
+}
+
+/* VM Create IRQ Chip
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates an interrupt controller chip for the VM specified by vm.
+ */
+void vm_create_irqchip(struct kvm_vm *vm)
+{
+	int ret;
+
+	ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
+	TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+
+	vm->has_irqchip = true;
+}
+
+/* VM VCPU State
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to structure that describes the state of the VCPU.
+ *
+ * Locates and returns a pointer to a structure that describes the
+ * state of the VCPU with the given vcpuid.
+ */
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	return vcpu->state;
+}
+
+/* VM VCPU Run
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Switch to executing the code for the VCPU given by vcpuid, within the VM
+ * given by vm.
+ */
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	int ret = _vcpu_run(vm, vcpuid);
+	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int rc;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+        do {
+		rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+	} while (rc == -1 && errno == EINTR);
+	return rc;
+}
+
+/* VM VCPU Set MP State
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   mp_state - mp_state to be set
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the MP state of the VCPU given by vcpuid, to the state given
+ * by mp_state.
+ */
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+	struct kvm_mp_state *mp_state)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
+	TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+}
+
+/* VM VCPU Regs Get
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args:
+ *   regs - current state of VCPU regs
+ *
+ * Return: None
+ *
+ * Obtains the current register state for the VCPU specified by vcpuid
+ * and stores it at the location given by regs.
+ */
+void vcpu_regs_get(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_regs *regs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Get the regs. */
+	ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
+	TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+/* VM VCPU Regs Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   regs - Values to set VCPU regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the regs of the VCPU specified by vcpuid to the values
+ * given by regs.
+ */
+void vcpu_regs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_regs *regs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Set the regs. */
+	ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
+	TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_vcpu_events *events)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Get the regs. */
+	ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
+	TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+			  struct kvm_vcpu_events *events)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Set the regs. */
+	ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
+	TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+/* VCPU Get MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
+	TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+
+	return buffer.entry.data;
+}
+
+/* VCPU Set MSR
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   msr_index - Index of MSR
+ *   msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+	uint64_t msr_value)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct {
+		struct kvm_msrs header;
+		struct kvm_msr_entry entry;
+	} buffer = {};
+	int r;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+	memset(&buffer, 0, sizeof(buffer));
+	buffer.header.nmsrs = 1;
+	buffer.entry.index = msr_index;
+	buffer.entry.data = msr_value;
+	r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+	TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
+		"  rc: %i errno: %i", r, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   num - number of arguments
+ *   ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args.  Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+	va_list ap;
+	struct kvm_regs regs;
+
+	TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+		    "  num: %u\n",
+		    num);
+
+	va_start(ap, num);
+	vcpu_regs_get(vm, vcpuid, &regs);
+
+	if (num >= 1)
+		regs.rdi = va_arg(ap, uint64_t);
+
+	if (num >= 2)
+		regs.rsi = va_arg(ap, uint64_t);
+
+	if (num >= 3)
+		regs.rdx = va_arg(ap, uint64_t);
+
+	if (num >= 4)
+		regs.rcx = va_arg(ap, uint64_t);
+
+	if (num >= 5)
+		regs.r8 = va_arg(ap, uint64_t);
+
+	if (num >= 6)
+		regs.r9 = va_arg(ap, uint64_t);
+
+	vcpu_regs_set(vm, vcpuid, &regs);
+	va_end(ap);
+}
+
+/* VM VCPU System Regs Get
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *
+ * Output Args:
+ *   sregs - current state of VCPU system regs
+ *
+ * Return: None
+ *
+ * Obtains the current system register state for the VCPU specified by
+ * vcpuid and stores it at the location given by sregs.
+ */
+void vcpu_sregs_get(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Get the regs. */
+	/* Get the regs. */
+	ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
+	TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
+		ret, errno);
+}
+
+/* VM VCPU System Regs Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   sregs - Values to set VCPU system regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the system regs of the VCPU specified by vcpuid to the values
+ * given by sregs.
+ */
+void vcpu_sregs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
+	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+		"rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_sregs_set(struct kvm_vm *vm,
+	uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	/* Get the regs. */
+	return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+}
+
+/* VCPU Ioctl
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   cmd - Ioctl number
+ *   arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VCPU fd.
+ */
+void vcpu_ioctl(struct kvm_vm *vm,
+	uint32_t vcpuid, unsigned long cmd, void *arg)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int ret;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	ret = ioctl(vcpu->fd, cmd, arg);
+	TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
+		cmd, ret, errno, strerror(errno));
+}
+
+/* VM Ioctl
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   cmd - Ioctl number
+ *   arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VM fd.
+ */
+void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+	int ret;
+
+	ret = ioctl(vm->fd, cmd, arg);
+	TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
+		cmd, ret, errno, strerror(errno));
+}
+
+/* VM Dump
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   indent - Left margin indent amount
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VM given by vm, to the FILE stream
+ * given by stream.
+ */
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	struct userspace_mem_region *region;
+	struct vcpu *vcpu;
+
+	fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
+	fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
+	fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
+	fprintf(stream, "%*sMem Regions:\n", indent, "");
+	for (region = vm->userspace_mem_region_head; region;
+		region = region->next) {
+		fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
+			"host_virt: %p\n", indent + 2, "",
+			(uint64_t) region->region.guest_phys_addr,
+			(uint64_t) region->region.memory_size,
+			region->host_mem);
+		fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
+		sparsebit_dump(stream, region->unused_phy_pages, 0);
+	}
+	fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
+	sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
+	fprintf(stream, "%*spgd_created: %u\n", indent, "",
+		vm->pgd_created);
+	if (vm->pgd_created) {
+		fprintf(stream, "%*sVirtual Translation Tables:\n",
+			indent + 2, "");
+		virt_dump(stream, vm, indent + 4);
+	}
+	fprintf(stream, "%*sVCPUs:\n", indent, "");
+	for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+		vcpu_dump(stream, vm, vcpu->id, indent + 2);
+}
+
+/* VM VCPU Dump
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU ID
+ *   indent - Left margin indent amount
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+	uint32_t vcpuid, uint8_t indent)
+{
+		struct kvm_regs regs;
+		struct kvm_sregs sregs;
+
+		fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+		fprintf(stream, "%*sregs:\n", indent + 2, "");
+		vcpu_regs_get(vm, vcpuid, &regs);
+		regs_dump(stream, &regs, indent + 4);
+
+		fprintf(stream, "%*ssregs:\n", indent + 2, "");
+		vcpu_sregs_get(vm, vcpuid, &sregs);
+		sregs_dump(stream, &sregs, indent + 4);
+}
+
+/* Known KVM exit reasons */
+static struct exit_reason {
+	unsigned int reason;
+	const char *name;
+} exit_reasons_known[] = {
+	{KVM_EXIT_UNKNOWN, "UNKNOWN"},
+	{KVM_EXIT_EXCEPTION, "EXCEPTION"},
+	{KVM_EXIT_IO, "IO"},
+	{KVM_EXIT_HYPERCALL, "HYPERCALL"},
+	{KVM_EXIT_DEBUG, "DEBUG"},
+	{KVM_EXIT_HLT, "HLT"},
+	{KVM_EXIT_MMIO, "MMIO"},
+	{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
+	{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
+	{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
+	{KVM_EXIT_INTR, "INTR"},
+	{KVM_EXIT_SET_TPR, "SET_TPR"},
+	{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
+	{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
+	{KVM_EXIT_S390_RESET, "S390_RESET"},
+	{KVM_EXIT_DCR, "DCR"},
+	{KVM_EXIT_NMI, "NMI"},
+	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
+	{KVM_EXIT_OSI, "OSI"},
+	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
+	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+#endif
+};
+
+/* Exit Reason String
+ *
+ * Input Args:
+ *   exit_reason - Exit reason
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Constant string pointer describing the exit reason.
+ *
+ * Locates and returns a constant string that describes the KVM exit
+ * reason given by exit_reason.  If no such string is found, a constant
+ * string of "Unknown" is returned.
+ */
+const char *exit_reason_str(unsigned int exit_reason)
+{
+	unsigned int n1;
+
+	for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
+		if (exit_reason == exit_reasons_known[n1].reason)
+			return exit_reasons_known[n1].name;
+	}
+
+	return "Unknown";
+}
+
+/* Physical Page Allocate
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   paddr_min - Physical address minimum
+ *   memslot - Memory region to allocate page from
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Starting physical address
+ *
+ * Within the VM specified by vm, locates an available physical page
+ * at or above paddr_min.  If found, the page is marked as in use
+ * and its address is returned.  A TEST_ASSERT failure occurs if no
+ * page is available at or above paddr_min.
+ */
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+	vm_paddr_t paddr_min, uint32_t memslot)
+{
+	struct userspace_mem_region *region;
+	sparsebit_idx_t pg;
+
+	TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
+		"not divisible by page size.\n"
+		"  paddr_min: 0x%lx page_size: 0x%x",
+		paddr_min, vm->page_size);
+
+	/* Locate memory region. */
+	region = memslot2region(vm, memslot);
+
+	/* Locate next available physical page at or above paddr_min. */
+	pg = paddr_min >> vm->page_shift;
+
+	if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+		pg = sparsebit_next_set(region->unused_phy_pages, pg);
+		if (pg == 0) {
+			fprintf(stderr, "No guest physical page available, "
+				"paddr_min: 0x%lx page_size: 0x%x memslot: %u",
+				paddr_min, vm->page_size, memslot);
+			fputs("---- vm dump ----\n", stderr);
+			vm_dump(stderr, vm, 2);
+			abort();
+		}
+	}
+
+	/* Specify page as in use and return its address. */
+	sparsebit_clear(region->unused_phy_pages, pg);
+
+	return pg * vm->page_size;
+}
+
+/* Address Guest Virtual to Host Virtual
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent host virtual address
+ */
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
+}
+
+void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
+		     struct guest_args *args)
+{
+	struct kvm_run *run = vcpu_state(vm, vcpu_id);
+	struct kvm_regs regs;
+
+	memset(&regs, 0, sizeof(regs));
+	vcpu_regs_get(vm, vcpu_id, &regs);
+
+	args->port = run->io.port;
+	args->arg0 = regs.rdi;
+	args->arg1 = regs.rsi;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
new file mode 100644
index 000000000..542ed606b
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -0,0 +1,72 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#ifndef KVM_UTIL_INTERNAL_H
+#define KVM_UTIL_INTERNAL_H 1
+
+#include "sparsebit.h"
+
+#ifndef BITS_PER_BYTE
+#define BITS_PER_BYTE           8
+#endif
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#endif
+
+#define DIV_ROUND_UP(n, d)	(((n) + (d) - 1) / (d))
+#define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_LONG)
+
+/* Concrete definition of struct kvm_vm. */
+struct userspace_mem_region {
+	struct userspace_mem_region *next, *prev;
+	struct kvm_userspace_memory_region region;
+	struct sparsebit *unused_phy_pages;
+	int fd;
+	off_t offset;
+	void *host_mem;
+	void *mmap_start;
+	size_t mmap_size;
+};
+
+struct vcpu {
+	struct vcpu *next, *prev;
+	uint32_t id;
+	int fd;
+	struct kvm_run *state;
+};
+
+struct kvm_vm {
+	int mode;
+	int kvm_fd;
+	int fd;
+	unsigned int page_size;
+	unsigned int page_shift;
+	uint64_t max_gfn;
+	struct vcpu *vcpu_head;
+	struct userspace_mem_region *userspace_mem_region_head;
+	struct sparsebit *vpages_valid;
+	struct sparsebit *vpages_mapped;
+
+	bool has_irqchip;
+	bool pgd_created;
+	vm_paddr_t pgd;
+	vm_vaddr_t gdt;
+	vm_vaddr_t tss;
+};
+
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+	uint32_t vcpuid);
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot);
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+	uint8_t indent);
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+	uint8_t indent);
+
+#endif
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
new file mode 100644
index 000000000..b132bc95d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -0,0 +1,2087 @@
+/*
+ * Sparse bit array
+ *
+ * Copyright (C) 2018, Google LLC.
+ * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This library provides functions to support a memory efficient bit array,
+ * with an index size of 2^64.  A sparsebit array is allocated through
+ * the use sparsebit_alloc() and free'd via sparsebit_free(),
+ * such as in the following:
+ *
+ *   struct sparsebit *s;
+ *   s = sparsebit_alloc();
+ *   sparsebit_free(&s);
+ *
+ * The struct sparsebit type resolves down to a struct sparsebit.
+ * Note that, sparsebit_free() takes a pointer to the sparsebit
+ * structure.  This is so that sparsebit_free() is able to poison
+ * the pointer (e.g. set it to NULL) to the struct sparsebit before
+ * returning to the caller.
+ *
+ * Between the return of sparsebit_alloc() and the call of
+ * sparsebit_free(), there are multiple query and modifying operations
+ * that can be performed on the allocated sparsebit array.  All of
+ * these operations take as a parameter the value returned from
+ * sparsebit_alloc() and most also take a bit index.  Frequently
+ * used routines include:
+ *
+ *  ---- Query Operations
+ *  sparsebit_is_set(s, idx)
+ *  sparsebit_is_clear(s, idx)
+ *  sparsebit_any_set(s)
+ *  sparsebit_first_set(s)
+ *  sparsebit_next_set(s, prev_idx)
+ *
+ *  ---- Modifying Operations
+ *  sparsebit_set(s, idx)
+ *  sparsebit_clear(s, idx)
+ *  sparsebit_set_num(s, idx, num);
+ *  sparsebit_clear_num(s, idx, num);
+ *
+ * A common operation, is to itterate over all the bits set in a test
+ * sparsebit array.  This can be done via code with the following structure:
+ *
+ *   sparsebit_idx_t idx;
+ *   if (sparsebit_any_set(s)) {
+ *     idx = sparsebit_first_set(s);
+ *     do {
+ *       ...
+ *       idx = sparsebit_next_set(s, idx);
+ *     } while (idx != 0);
+ *   }
+ *
+ * The index of the first bit set needs to be obtained via
+ * sparsebit_first_set(), because sparsebit_next_set(), needs
+ * the index of the previously set.  The sparsebit_idx_t type is
+ * unsigned, so there is no previous index before 0 that is available.
+ * Also, the call to sparsebit_first_set() is not made unless there
+ * is at least 1 bit in the array set.  This is because sparsebit_first_set()
+ * aborts if sparsebit_first_set() is called with no bits set.
+ * It is the callers responsibility to assure that the
+ * sparsebit array has at least a single bit set before calling
+ * sparsebit_first_set().
+ *
+ * ==== Implementation Overview ====
+ * For the most part the internal implementation of sparsebit is
+ * opaque to the caller.  One important implementation detail that the
+ * caller may need to be aware of is the spatial complexity of the
+ * implementation.  This implementation of a sparsebit array is not
+ * only sparse, in that it uses memory proportional to the number of bits
+ * set.  It is also efficient in memory usage when most of the bits are
+ * set.
+ *
+ * At a high-level the state of the bit settings are maintained through
+ * the use of a binary-search tree, where each node contains at least
+ * the following members:
+ *
+ *   typedef uint64_t sparsebit_idx_t;
+ *   typedef uint64_t sparsebit_num_t;
+ *
+ *   sparsebit_idx_t idx;
+ *   uint32_t mask;
+ *   sparsebit_num_t num_after;
+ *
+ * The idx member contains the bit index of the first bit described by this
+ * node, while the mask member stores the setting of the first 32-bits.
+ * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
+ * mask member at 1 << n.
+ *
+ * Nodes are sorted by idx and the bits described by two nodes will never
+ * overlap. The idx member is always aligned to the mask size, i.e. a
+ * multiple of 32.
+ *
+ * Beyond a typical implementation, the nodes in this implementation also
+ * contains a member named num_after.  The num_after member holds the
+ * number of bits immediately after the mask bits that are contiguously set.
+ * The use of the num_after member allows this implementation to efficiently
+ * represent cases where most bits are set.  For example, the case of all
+ * but the last two bits set, is represented by the following two nodes:
+ *
+ *   node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
+ *   node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
+ *
+ * ==== Invariants ====
+ * This implementation usses the following invariants:
+ *
+ *   + Node are only used to represent bits that are set.
+ *     Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ *   + Sum of bits set in all the nodes is equal to the value of
+ *     the struct sparsebit_pvt num_set member.
+ *
+ *   + The setting of at least one bit is always described in a nodes
+ *     mask (mask >= 1).
+ *
+ *   + A node with all mask bits set only occurs when the last bit
+ *     described by the previous node is not equal to this nodes
+ *     starting index - 1.  All such occurences of this condition are
+ *     avoided by moving the setting of the nodes mask bits into
+ *     the previous nodes num_after setting.
+ *
+ *   + Node starting index is evenly divisible by the number of bits
+ *     within a nodes mask member.
+ *
+ *   + Nodes never represent a range of bits that wrap around the
+ *     highest supported index.
+ *
+ *      (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
+ *
+ *     As a consequence of the above, the num_after member of a node
+ *     will always be <=:
+ *
+ *       maximum_index - nodes_starting_index - number_of_mask_bits
+ *
+ *   + Nodes within the binary search tree are sorted based on each
+ *     nodes starting index.
+ *
+ *   + The range of bits described by any two nodes do not overlap.  The
+ *     range of bits described by a single node is:
+ *
+ *       start: node->idx
+ *       end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
+ *
+ * Note, at times these invariants are temporarily violated for a
+ * specific portion of the code.  For example, when setting a mask
+ * bit, there is a small delay between when the mask bit is set and the
+ * value in the struct sparsebit_pvt num_set member is updated.  Other
+ * temporary violations occur when node_split() is called with a specified
+ * index and assures that a node where its mask represents the bit
+ * at the specified index exists.  At times to do this node_split()
+ * must split an existing node into two nodes or create a node that
+ * has no bits set.  Such temporary violations must be corrected before
+ * returning to the caller.  These corrections are typically performed
+ * by the local function node_reduce().
+ */
+
+#include "test_util.h"
+#include "sparsebit.h"
+#include <limits.h>
+#include <assert.h>
+
+#define DUMP_LINE_MAX 100 /* Does not include indent amount */
+
+typedef uint32_t mask_t;
+#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
+
+struct node {
+	struct node *parent;
+	struct node *left;
+	struct node *right;
+	sparsebit_idx_t idx; /* index of least-significant bit in mask */
+	sparsebit_num_t num_after; /* num contiguously set after mask */
+	mask_t mask;
+};
+
+struct sparsebit {
+	/*
+	 * Points to root node of the binary search
+	 * tree.  Equal to NULL when no bits are set in
+	 * the entire sparsebit array.
+	 */
+	struct node *root;
+
+	/*
+	 * A redundant count of the total number of bits set.  Used for
+	 * diagnostic purposes and to change the time complexity of
+	 * sparsebit_num_set() from O(n) to O(1).
+	 * Note: Due to overflow, a value of 0 means none or all set.
+	 */
+	sparsebit_num_t num_set;
+};
+
+/* Returns the number of set bits described by the settings
+ * of the node pointed to by nodep.
+ */
+static sparsebit_num_t node_num_set(struct node *nodep)
+{
+	return nodep->num_after + __builtin_popcount(nodep->mask);
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index.
+ */
+static struct node *node_first(struct sparsebit *s)
+{
+	struct node *nodep;
+
+	for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
+		;
+
+	return nodep;
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index > the index of the node pointed to by np.
+ * Returns NULL if no node with a higher index exists.
+ */
+static struct node *node_next(struct sparsebit *s, struct node *np)
+{
+	struct node *nodep = np;
+
+	/*
+	 * If current node has a right child, next node is the left-most
+	 * of the right child.
+	 */
+	if (nodep->right) {
+		for (nodep = nodep->right; nodep->left; nodep = nodep->left)
+			;
+		return nodep;
+	}
+
+	/*
+	 * No right child.  Go up until node is left child of a parent.
+	 * That parent is then the next node.
+	 */
+	while (nodep->parent && nodep == nodep->parent->right)
+		nodep = nodep->parent;
+
+	return nodep->parent;
+}
+
+/* Searches for and returns a pointer to the node that describes the
+ * highest index < the index of the node pointed to by np.
+ * Returns NULL if no node with a lower index exists.
+ */
+static struct node *node_prev(struct sparsebit *s, struct node *np)
+{
+	struct node *nodep = np;
+
+	/*
+	 * If current node has a left child, next node is the right-most
+	 * of the left child.
+	 */
+	if (nodep->left) {
+		for (nodep = nodep->left; nodep->right; nodep = nodep->right)
+			;
+		return (struct node *) nodep;
+	}
+
+	/*
+	 * No left child.  Go up until node is right child of a parent.
+	 * That parent is then the next node.
+	 */
+	while (nodep->parent && nodep == nodep->parent->left)
+		nodep = nodep->parent;
+
+	return (struct node *) nodep->parent;
+}
+
+
+/* Allocates space to hold a copy of the node sub-tree pointed to by
+ * subtree and duplicates the bit settings to the newly allocated nodes.
+ * Returns the newly allocated copy of subtree.
+ */
+static struct node *node_copy_subtree(struct node *subtree)
+{
+	struct node *root;
+
+	/* Duplicate the node at the root of the subtree */
+	root = calloc(1, sizeof(*root));
+	if (!root) {
+		perror("calloc");
+		abort();
+	}
+
+	root->idx = subtree->idx;
+	root->mask = subtree->mask;
+	root->num_after = subtree->num_after;
+
+	/* As needed, recursively duplicate the left and right subtrees */
+	if (subtree->left) {
+		root->left = node_copy_subtree(subtree->left);
+		root->left->parent = root;
+	}
+
+	if (subtree->right) {
+		root->right = node_copy_subtree(subtree->right);
+		root->right->parent = root;
+	}
+
+	return root;
+}
+
+/* Searches for and returns a pointer to the node that describes the setting
+ * of the bit given by idx.  A node describes the setting of a bit if its
+ * index is within the bits described by the mask bits or the number of
+ * contiguous bits set after the mask.  Returns NULL if there is no such node.
+ */
+static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Find the node that describes the setting of the bit at idx */
+	for (nodep = s->root; nodep;
+	     nodep = nodep->idx > idx ? nodep->left : nodep->right) {
+		if (idx >= nodep->idx &&
+		    idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+			break;
+	}
+
+	return nodep;
+}
+
+/* Entry Requirements:
+ *   + A node that describes the setting of idx is not already present.
+ *
+ * Adds a new node to describe the setting of the bit at the index given
+ * by idx.  Returns a pointer to the newly added node.
+ *
+ * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
+ */
+static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep, *parentp, *prev;
+
+	/* Allocate and initialize the new node. */
+	nodep = calloc(1, sizeof(*nodep));
+	if (!nodep) {
+		perror("calloc");
+		abort();
+	}
+
+	nodep->idx = idx & -MASK_BITS;
+
+	/* If no nodes, set it up as the root node. */
+	if (!s->root) {
+		s->root = nodep;
+		return nodep;
+	}
+
+	/*
+	 * Find the parent where the new node should be attached
+	 * and add the node there.
+	 */
+	parentp = s->root;
+	while (true) {
+		if (idx < parentp->idx) {
+			if (!parentp->left) {
+				parentp->left = nodep;
+				nodep->parent = parentp;
+				break;
+			}
+			parentp = parentp->left;
+		} else {
+			assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
+			if (!parentp->right) {
+				parentp->right = nodep;
+				nodep->parent = parentp;
+				break;
+			}
+			parentp = parentp->right;
+		}
+	}
+
+	/*
+	 * Does num_after bits of previous node overlap with the mask
+	 * of the new node?  If so set the bits in the new nodes mask
+	 * and reduce the previous nodes num_after.
+	 */
+	prev = node_prev(s, nodep);
+	while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
+		unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
+			- nodep->idx;
+		assert(prev->num_after > 0);
+		assert(n1 < MASK_BITS);
+		assert(!(nodep->mask & (1 << n1)));
+		nodep->mask |= (1 << n1);
+		prev->num_after--;
+	}
+
+	return nodep;
+}
+
+/* Returns whether all the bits in the sparsebit array are set.  */
+bool sparsebit_all_set(struct sparsebit *s)
+{
+	/*
+	 * If any nodes there must be at least one bit set.  Only case
+	 * where a bit is set and total num set is 0, is when all bits
+	 * are set.
+	 */
+	return s->root && s->num_set == 0;
+}
+
+/* Clears all bits described by the node pointed to by nodep, then
+ * removes the node.
+ */
+static void node_rm(struct sparsebit *s, struct node *nodep)
+{
+	struct node *tmp;
+	sparsebit_num_t num_set;
+
+	num_set = node_num_set(nodep);
+	assert(s->num_set >= num_set || sparsebit_all_set(s));
+	s->num_set -= node_num_set(nodep);
+
+	/* Have both left and right child */
+	if (nodep->left && nodep->right) {
+		/*
+		 * Move left children to the leftmost leaf node
+		 * of the right child.
+		 */
+		for (tmp = nodep->right; tmp->left; tmp = tmp->left)
+			;
+		tmp->left = nodep->left;
+		nodep->left = NULL;
+		tmp->left->parent = tmp;
+	}
+
+	/* Left only child */
+	if (nodep->left) {
+		if (!nodep->parent) {
+			s->root = nodep->left;
+			nodep->left->parent = NULL;
+		} else {
+			nodep->left->parent = nodep->parent;
+			if (nodep == nodep->parent->left)
+				nodep->parent->left = nodep->left;
+			else {
+				assert(nodep == nodep->parent->right);
+				nodep->parent->right = nodep->left;
+			}
+		}
+
+		nodep->parent = nodep->left = nodep->right = NULL;
+		free(nodep);
+
+		return;
+	}
+
+
+	/* Right only child */
+	if (nodep->right) {
+		if (!nodep->parent) {
+			s->root = nodep->right;
+			nodep->right->parent = NULL;
+		} else {
+			nodep->right->parent = nodep->parent;
+			if (nodep == nodep->parent->left)
+				nodep->parent->left = nodep->right;
+			else {
+				assert(nodep == nodep->parent->right);
+				nodep->parent->right = nodep->right;
+			}
+		}
+
+		nodep->parent = nodep->left = nodep->right = NULL;
+		free(nodep);
+
+		return;
+	}
+
+	/* Leaf Node */
+	if (!nodep->parent) {
+		s->root = NULL;
+	} else {
+		if (nodep->parent->left == nodep)
+			nodep->parent->left = NULL;
+		else {
+			assert(nodep == nodep->parent->right);
+			nodep->parent->right = NULL;
+		}
+	}
+
+	nodep->parent = nodep->left = nodep->right = NULL;
+	free(nodep);
+
+	return;
+}
+
+/* Splits the node containing the bit at idx so that there is a node
+ * that starts at the specified index.  If no such node exists, a new
+ * node at the specified index is created.  Returns the new node.
+ *
+ * idx must start of a mask boundary.
+ */
+static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep1, *nodep2;
+	sparsebit_idx_t offset;
+	sparsebit_num_t orig_num_after;
+
+	assert(!(idx % MASK_BITS));
+
+	/*
+	 * Is there a node that describes the setting of idx?
+	 * If not, add it.
+	 */
+	nodep1 = node_find(s, idx);
+	if (!nodep1)
+		return node_add(s, idx);
+
+	/*
+	 * All done if the starting index of the node is where the
+	 * split should occur.
+	 */
+	if (nodep1->idx == idx)
+		return nodep1;
+
+	/*
+	 * Split point not at start of mask, so it must be part of
+	 * bits described by num_after.
+	 */
+
+	/*
+	 * Calculate offset within num_after for where the split is
+	 * to occur.
+	 */
+	offset = idx - (nodep1->idx + MASK_BITS);
+	orig_num_after = nodep1->num_after;
+
+	/*
+	 * Add a new node to describe the bits starting at
+	 * the split point.
+	 */
+	nodep1->num_after = offset;
+	nodep2 = node_add(s, idx);
+
+	/* Move bits after the split point into the new node */
+	nodep2->num_after = orig_num_after - offset;
+	if (nodep2->num_after >= MASK_BITS) {
+		nodep2->mask = ~(mask_t) 0;
+		nodep2->num_after -= MASK_BITS;
+	} else {
+		nodep2->mask = (1 << nodep2->num_after) - 1;
+		nodep2->num_after = 0;
+	}
+
+	return nodep2;
+}
+
+/* Iteratively reduces the node pointed to by nodep and its adjacent
+ * nodes into a more compact form.  For example, a node with a mask with
+ * all bits set adjacent to a previous node, will get combined into a
+ * single node with an increased num_after setting.
+ *
+ * After each reduction, a further check is made to see if additional
+ * reductions are possible with the new previous and next nodes.  Note,
+ * a search for a reduction is only done across the nodes nearest nodep
+ * and those that became part of a reduction.  Reductions beyond nodep
+ * and the adjacent nodes that are reduced are not discovered.  It is the
+ * responsibility of the caller to pass a nodep that is within one node
+ * of each possible reduction.
+ *
+ * This function does not fix the temporary violation of all invariants.
+ * For example it does not fix the case where the bit settings described
+ * by two or more nodes overlap.  Such a violation introduces the potential
+ * complication of a bit setting for a specific index having different settings
+ * in different nodes.  This would then introduce the further complication
+ * of which node has the correct setting of the bit and thus such conditions
+ * are not allowed.
+ *
+ * This function is designed to fix invariant violations that are introduced
+ * by node_split() and by changes to the nodes mask or num_after members.
+ * For example, when setting a bit within a nodes mask, the function that
+ * sets the bit doesn't have to worry about whether the setting of that
+ * bit caused the mask to have leading only or trailing only bits set.
+ * Instead, the function can call node_reduce(), with nodep equal to the
+ * node address that it set a mask bit in, and node_reduce() will notice
+ * the cases of leading or trailing only bits and that there is an
+ * adjacent node that the bit settings could be merged into.
+ *
+ * This implementation specifically detects and corrects violation of the
+ * following invariants:
+ *
+ *   + Node are only used to represent bits that are set.
+ *     Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ *   + The setting of at least one bit is always described in a nodes
+ *     mask (mask >= 1).
+ *
+ *   + A node with all mask bits set only occurs when the last bit
+ *     described by the previous node is not equal to this nodes
+ *     starting index - 1.  All such occurences of this condition are
+ *     avoided by moving the setting of the nodes mask bits into
+ *     the previous nodes num_after setting.
+ */
+static void node_reduce(struct sparsebit *s, struct node *nodep)
+{
+	bool reduction_performed;
+
+	do {
+		reduction_performed = false;
+		struct node *prev, *next, *tmp;
+
+		/* 1) Potential reductions within the current node. */
+
+		/* Nodes with all bits cleared may be removed. */
+		if (nodep->mask == 0 && nodep->num_after == 0) {
+			/*
+			 * About to remove the node pointed to by
+			 * nodep, which normally would cause a problem
+			 * for the next pass through the reduction loop,
+			 * because the node at the starting point no longer
+			 * exists.  This potential problem is handled
+			 * by first remembering the location of the next
+			 * or previous nodes.  Doesn't matter which, because
+			 * once the node at nodep is removed, there will be
+			 * no other nodes between prev and next.
+			 *
+			 * Note, the checks performed on nodep against both
+			 * both prev and next both check for an adjacent
+			 * node that can be reduced into a single node.  As
+			 * such, after removing the node at nodep, doesn't
+			 * matter whether the nodep for the next pass
+			 * through the loop is equal to the previous pass
+			 * prev or next node.  Either way, on the next pass
+			 * the one not selected will become either the
+			 * prev or next node.
+			 */
+			tmp = node_next(s, nodep);
+			if (!tmp)
+				tmp = node_prev(s, nodep);
+
+			node_rm(s, nodep);
+			nodep = NULL;
+
+			nodep = tmp;
+			reduction_performed = true;
+			continue;
+		}
+
+		/*
+		 * When the mask is 0, can reduce the amount of num_after
+		 * bits by moving the initial num_after bits into the mask.
+		 */
+		if (nodep->mask == 0) {
+			assert(nodep->num_after != 0);
+			assert(nodep->idx + MASK_BITS > nodep->idx);
+
+			nodep->idx += MASK_BITS;
+
+			if (nodep->num_after >= MASK_BITS) {
+				nodep->mask = ~0;
+				nodep->num_after -= MASK_BITS;
+			} else {
+				nodep->mask = (1u << nodep->num_after) - 1;
+				nodep->num_after = 0;
+			}
+
+			reduction_performed = true;
+			continue;
+		}
+
+		/*
+		 * 2) Potential reductions between the current and
+		 * previous nodes.
+		 */
+		prev = node_prev(s, nodep);
+		if (prev) {
+			sparsebit_idx_t prev_highest_bit;
+
+			/* Nodes with no bits set can be removed. */
+			if (prev->mask == 0 && prev->num_after == 0) {
+				node_rm(s, prev);
+
+				reduction_performed = true;
+				continue;
+			}
+
+			/*
+			 * All mask bits set and previous node has
+			 * adjacent index.
+			 */
+			if (nodep->mask + 1 == 0 &&
+			    prev->idx + MASK_BITS == nodep->idx) {
+				prev->num_after += MASK_BITS + nodep->num_after;
+				nodep->mask = 0;
+				nodep->num_after = 0;
+
+				reduction_performed = true;
+				continue;
+			}
+
+			/*
+			 * Is node adjacent to previous node and the node
+			 * contains a single contiguous range of bits
+			 * starting from the beginning of the mask?
+			 */
+			prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
+			if (prev_highest_bit + 1 == nodep->idx &&
+			    (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
+				/*
+				 * How many contiguous bits are there?
+				 * Is equal to the total number of set
+				 * bits, due to an earlier check that
+				 * there is a single contiguous range of
+				 * set bits.
+				 */
+				unsigned int num_contiguous
+					= __builtin_popcount(nodep->mask);
+				assert((num_contiguous > 0) &&
+				       ((1ULL << num_contiguous) - 1) == nodep->mask);
+
+				prev->num_after += num_contiguous;
+				nodep->mask = 0;
+
+				/*
+				 * For predictable performance, handle special
+				 * case where all mask bits are set and there
+				 * is a non-zero num_after setting.  This code
+				 * is functionally correct without the following
+				 * conditionalized statements, but without them
+				 * the value of num_after is only reduced by
+				 * the number of mask bits per pass.  There are
+				 * cases where num_after can be close to 2^64.
+				 * Without this code it could take nearly
+				 * (2^64) / 32 passes to perform the full
+				 * reduction.
+				 */
+				if (num_contiguous == MASK_BITS) {
+					prev->num_after += nodep->num_after;
+					nodep->num_after = 0;
+				}
+
+				reduction_performed = true;
+				continue;
+			}
+		}
+
+		/*
+		 * 3) Potential reductions between the current and
+		 * next nodes.
+		 */
+		next = node_next(s, nodep);
+		if (next) {
+			/* Nodes with no bits set can be removed. */
+			if (next->mask == 0 && next->num_after == 0) {
+				node_rm(s, next);
+				reduction_performed = true;
+				continue;
+			}
+
+			/*
+			 * Is next node index adjacent to current node
+			 * and has a mask with all bits set?
+			 */
+			if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
+			    next->mask == ~(mask_t) 0) {
+				nodep->num_after += MASK_BITS;
+				next->mask = 0;
+				nodep->num_after += next->num_after;
+				next->num_after = 0;
+
+				node_rm(s, next);
+				next = NULL;
+
+				reduction_performed = true;
+				continue;
+			}
+		}
+	} while (nodep && reduction_performed);
+}
+
+/* Returns whether the bit at the index given by idx, within the
+ * sparsebit array is set or not.
+ */
+bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Find the node that describes the setting of the bit at idx */
+	for (nodep = s->root; nodep;
+	     nodep = nodep->idx > idx ? nodep->left : nodep->right)
+		if (idx >= nodep->idx &&
+		    idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+			goto have_node;
+
+	return false;
+
+have_node:
+	/* Bit is set if it is any of the bits described by num_after */
+	if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
+		return true;
+
+	/* Is the corresponding mask bit set */
+	assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
+	return !!(nodep->mask & (1 << (idx - nodep->idx)));
+}
+
+/* Within the sparsebit array pointed to by s, sets the bit
+ * at the index given by idx.
+ */
+static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Skip bits that are already set */
+	if (sparsebit_is_set(s, idx))
+		return;
+
+	/*
+	 * Get a node where the bit at idx is described by the mask.
+	 * The node_split will also create a node, if there isn't
+	 * already a node that describes the setting of bit.
+	 */
+	nodep = node_split(s, idx & -MASK_BITS);
+
+	/* Set the bit within the nodes mask */
+	assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+	assert(!(nodep->mask & (1 << (idx - nodep->idx))));
+	nodep->mask |= 1 << (idx - nodep->idx);
+	s->num_set++;
+
+	node_reduce(s, nodep);
+}
+
+/* Within the sparsebit array pointed to by s, clears the bit
+ * at the index given by idx.
+ */
+static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	struct node *nodep;
+
+	/* Skip bits that are already cleared */
+	if (!sparsebit_is_set(s, idx))
+		return;
+
+	/* Is there a node that describes the setting of this bit? */
+	nodep = node_find(s, idx);
+	if (!nodep)
+		return;
+
+	/*
+	 * If a num_after bit, split the node, so that the bit is
+	 * part of a node mask.
+	 */
+	if (idx >= nodep->idx + MASK_BITS)
+		nodep = node_split(s, idx & -MASK_BITS);
+
+	/*
+	 * After node_split above, bit at idx should be within the mask.
+	 * Clear that bit.
+	 */
+	assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+	assert(nodep->mask & (1 << (idx - nodep->idx)));
+	nodep->mask &= ~(1 << (idx - nodep->idx));
+	assert(s->num_set > 0 || sparsebit_all_set(s));
+	s->num_set--;
+
+	node_reduce(s, nodep);
+}
+
+/* Recursively dumps to the FILE stream given by stream the contents
+ * of the sub-tree of nodes pointed to by nodep.  Each line of output
+ * is prefixed by the number of spaces given by indent.  On each
+ * recursion, the indent amount is increased by 2.  This causes nodes
+ * at each level deeper into the binary search tree to be displayed
+ * with a greater indent.
+ */
+static void dump_nodes(FILE *stream, struct node *nodep,
+	unsigned int indent)
+{
+	char *node_type;
+
+	/* Dump contents of node */
+	if (!nodep->parent)
+		node_type = "root";
+	else if (nodep == nodep->parent->left)
+		node_type = "left";
+	else {
+		assert(nodep == nodep->parent->right);
+		node_type = "right";
+	}
+	fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
+	fprintf(stream, "%*s  parent: %p left: %p right: %p\n", indent, "",
+		nodep->parent, nodep->left, nodep->right);
+	fprintf(stream, "%*s  idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
+		indent, "", nodep->idx, nodep->mask, nodep->num_after);
+
+	/* If present, dump contents of left child nodes */
+	if (nodep->left)
+		dump_nodes(stream, nodep->left, indent + 2);
+
+	/* If present, dump contents of right child nodes */
+	if (nodep->right)
+		dump_nodes(stream, nodep->right, indent + 2);
+}
+
+static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
+{
+	mask_t leading = (mask_t)1 << start;
+	int n1 = __builtin_ctz(nodep->mask & -leading);
+
+	return nodep->idx + n1;
+}
+
+static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
+{
+	mask_t leading = (mask_t)1 << start;
+	int n1 = __builtin_ctz(~nodep->mask & -leading);
+
+	return nodep->idx + n1;
+}
+
+/* Dumps to the FILE stream specified by stream, the implementation dependent
+ * internal state of s.  Each line of output is prefixed with the number
+ * of spaces given by indent.  The output is completely implementation
+ * dependent and subject to change.  Output from this function should only
+ * be used for diagnostic purposes.  For example, this function can be
+ * used by test cases after they detect an unexpected condition, as a means
+ * to capture diagnostic information.
+ */
+static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+	unsigned int indent)
+{
+	/* Dump the contents of s */
+	fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
+	fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
+
+	if (s->root)
+		dump_nodes(stream, s->root, indent);
+}
+
+/* Allocates and returns a new sparsebit array. The initial state
+ * of the newly allocated sparsebit array has all bits cleared.
+ */
+struct sparsebit *sparsebit_alloc(void)
+{
+	struct sparsebit *s;
+
+	/* Allocate top level structure. */
+	s = calloc(1, sizeof(*s));
+	if (!s) {
+		perror("calloc");
+		abort();
+	}
+
+	return s;
+}
+
+/* Frees the implementation dependent data for the sparsebit array
+ * pointed to by s and poisons the pointer to that data.
+ */
+void sparsebit_free(struct sparsebit **sbitp)
+{
+	struct sparsebit *s = *sbitp;
+
+	if (!s)
+		return;
+
+	sparsebit_clear_all(s);
+	free(s);
+	*sbitp = NULL;
+}
+
+/* Makes a copy of the sparsebit array given by s, to the sparsebit
+ * array given by d.  Note, d must have already been allocated via
+ * sparsebit_alloc().  It can though already have bits set, which
+ * if different from src will be cleared.
+ */
+void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+{
+	/* First clear any bits already set in the destination */
+	sparsebit_clear_all(d);
+
+	if (s->root) {
+		d->root = node_copy_subtree(s->root);
+		d->num_set = s->num_set;
+	}
+}
+
+/* Returns whether num consecutive bits starting at idx are all set.  */
+bool sparsebit_is_set_num(struct sparsebit *s,
+	sparsebit_idx_t idx, sparsebit_num_t num)
+{
+	sparsebit_idx_t next_cleared;
+
+	assert(num > 0);
+	assert(idx + num - 1 >= idx);
+
+	/* With num > 0, the first bit must be set. */
+	if (!sparsebit_is_set(s, idx))
+		return false;
+
+	/* Find the next cleared bit */
+	next_cleared = sparsebit_next_clear(s, idx);
+
+	/*
+	 * If no cleared bits beyond idx, then there are at least num
+	 * set bits. idx + num doesn't wrap.  Otherwise check if
+	 * there are enough set bits between idx and the next cleared bit.
+	 */
+	return next_cleared == 0 || next_cleared - idx >= num;
+}
+
+/* Returns whether the bit at the index given by idx.  */
+bool sparsebit_is_clear(struct sparsebit *s,
+	sparsebit_idx_t idx)
+{
+	return !sparsebit_is_set(s, idx);
+}
+
+/* Returns whether num consecutive bits starting at idx are all cleared.  */
+bool sparsebit_is_clear_num(struct sparsebit *s,
+	sparsebit_idx_t idx, sparsebit_num_t num)
+{
+	sparsebit_idx_t next_set;
+
+	assert(num > 0);
+	assert(idx + num - 1 >= idx);
+
+	/* With num > 0, the first bit must be cleared. */
+	if (!sparsebit_is_clear(s, idx))
+		return false;
+
+	/* Find the next set bit */
+	next_set = sparsebit_next_set(s, idx);
+
+	/*
+	 * If no set bits beyond idx, then there are at least num
+	 * cleared bits. idx + num doesn't wrap.  Otherwise check if
+	 * there are enough cleared bits between idx and the next set bit.
+	 */
+	return next_set == 0 || next_set - idx >= num;
+}
+
+/* Returns the total number of bits set.  Note: 0 is also returned for
+ * the case of all bits set.  This is because with all bits set, there
+ * is 1 additional bit set beyond what can be represented in the return
+ * value.  Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
+ * to determine if the sparsebit array has any bits set.
+ */
+sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+{
+	return s->num_set;
+}
+
+/* Returns whether any bit is set in the sparsebit array.  */
+bool sparsebit_any_set(struct sparsebit *s)
+{
+	/*
+	 * Nodes only describe set bits.  If any nodes then there
+	 * is at least 1 bit set.
+	 */
+	if (!s->root)
+		return false;
+
+	/*
+	 * Every node should have a non-zero mask.  For now will
+	 * just assure that the root node has a non-zero mask,
+	 * which is a quick check that at least 1 bit is set.
+	 */
+	assert(s->root->mask != 0);
+	assert(s->num_set > 0 ||
+	       (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
+		s->root->mask == ~(mask_t) 0));
+
+	return true;
+}
+
+/* Returns whether all the bits in the sparsebit array are cleared.  */
+bool sparsebit_all_clear(struct sparsebit *s)
+{
+	return !sparsebit_any_set(s);
+}
+
+/* Returns whether all the bits in the sparsebit array are set.  */
+bool sparsebit_any_clear(struct sparsebit *s)
+{
+	return !sparsebit_all_set(s);
+}
+
+/* Returns the index of the first set bit.  Abort if no bits are set.
+ */
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+{
+	struct node *nodep;
+
+	/* Validate at least 1 bit is set */
+	assert(sparsebit_any_set(s));
+
+	nodep = node_first(s);
+	return node_first_set(nodep, 0);
+}
+
+/* Returns the index of the first cleared bit.  Abort if
+ * no bits are cleared.
+ */
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+{
+	struct node *nodep1, *nodep2;
+
+	/* Validate at least 1 bit is cleared. */
+	assert(sparsebit_any_clear(s));
+
+	/* If no nodes or first node index > 0 then lowest cleared is 0 */
+	nodep1 = node_first(s);
+	if (!nodep1 || nodep1->idx > 0)
+		return 0;
+
+	/* Does the mask in the first node contain any cleared bits. */
+	if (nodep1->mask != ~(mask_t) 0)
+		return node_first_clear(nodep1, 0);
+
+	/*
+	 * All mask bits set in first node.  If there isn't a second node
+	 * then the first cleared bit is the first bit after the bits
+	 * described by the first node.
+	 */
+	nodep2 = node_next(s, nodep1);
+	if (!nodep2) {
+		/*
+		 * No second node.  First cleared bit is first bit beyond
+		 * bits described by first node.
+		 */
+		assert(nodep1->mask == ~(mask_t) 0);
+		assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+	}
+
+	/*
+	 * There is a second node.
+	 * If it is not adjacent to the first node, then there is a gap
+	 * of cleared bits between the nodes, and the first cleared bit
+	 * is the first bit within the gap.
+	 */
+	if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+	/*
+	 * Second node is adjacent to the first node.
+	 * Because it is adjacent, its mask should be non-zero.  If all
+	 * its mask bits are set, then with it being adjacent, it should
+	 * have had the mask bits moved into the num_after setting of the
+	 * previous node.
+	 */
+	return node_first_clear(nodep2, 0);
+}
+
+/* Returns index of next bit set within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are set.
+ */
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+	sparsebit_idx_t prev)
+{
+	sparsebit_idx_t lowest_possible = prev + 1;
+	sparsebit_idx_t start;
+	struct node *nodep;
+
+	/* A bit after the highest index can't be set. */
+	if (lowest_possible == 0)
+		return 0;
+
+	/*
+	 * Find the leftmost 'candidate' overlapping or to the right
+	 * of lowest_possible.
+	 */
+	struct node *candidate = NULL;
+
+	/* True iff lowest_possible is within candidate */
+	bool contains = false;
+
+	/*
+	 * Find node that describes setting of bit at lowest_possible.
+	 * If such a node doesn't exist, find the node with the lowest
+	 * starting index that is > lowest_possible.
+	 */
+	for (nodep = s->root; nodep;) {
+		if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
+			>= lowest_possible) {
+			candidate = nodep;
+			if (candidate->idx <= lowest_possible) {
+				contains = true;
+				break;
+			}
+			nodep = nodep->left;
+		} else {
+			nodep = nodep->right;
+		}
+	}
+	if (!candidate)
+		return 0;
+
+	assert(candidate->mask != 0);
+
+	/* Does the candidate node describe the setting of lowest_possible? */
+	if (!contains) {
+		/*
+		 * Candidate doesn't describe setting of bit at lowest_possible.
+		 * Candidate points to the first node with a starting index
+		 * > lowest_possible.
+		 */
+		assert(candidate->idx > lowest_possible);
+
+		return node_first_set(candidate, 0);
+	}
+
+	/*
+	 * Candidate describes setting of bit at lowest_possible.
+	 * Note: although the node describes the setting of the bit
+	 * at lowest_possible, its possible that its setting and the
+	 * setting of all latter bits described by this node are 0.
+	 * For now, just handle the cases where this node describes
+	 * a bit at or after an index of lowest_possible that is set.
+	 */
+	start = lowest_possible - candidate->idx;
+
+	if (start < MASK_BITS && candidate->mask >= (1 << start))
+		return node_first_set(candidate, start);
+
+	if (candidate->num_after) {
+		sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
+
+		return lowest_possible < first_num_after_idx
+			? first_num_after_idx : lowest_possible;
+	}
+
+	/*
+	 * Although candidate node describes setting of bit at
+	 * the index of lowest_possible, all bits at that index and
+	 * latter that are described by candidate are cleared.  With
+	 * this, the next bit is the first bit in the next node, if
+	 * such a node exists.  If a next node doesn't exist, then
+	 * there is no next set bit.
+	 */
+	candidate = node_next(s, candidate);
+	if (!candidate)
+		return 0;
+
+	return node_first_set(candidate, 0);
+}
+
+/* Returns index of next bit cleared within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are cleared.
+ */
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+	sparsebit_idx_t prev)
+{
+	sparsebit_idx_t lowest_possible = prev + 1;
+	sparsebit_idx_t idx;
+	struct node *nodep1, *nodep2;
+
+	/* A bit after the highest index can't be set. */
+	if (lowest_possible == 0)
+		return 0;
+
+	/*
+	 * Does a node describing the setting of lowest_possible exist?
+	 * If not, the bit at lowest_possible is cleared.
+	 */
+	nodep1 = node_find(s, lowest_possible);
+	if (!nodep1)
+		return lowest_possible;
+
+	/* Does a mask bit in node 1 describe the next cleared bit. */
+	for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
+		if (!(nodep1->mask & (1 << idx)))
+			return nodep1->idx + idx;
+
+	/*
+	 * Next cleared bit is not described by node 1.  If there
+	 * isn't a next node, then next cleared bit is described
+	 * by bit after the bits described by the first node.
+	 */
+	nodep2 = node_next(s, nodep1);
+	if (!nodep2)
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+	/*
+	 * There is a second node.
+	 * If it is not adjacent to the first node, then there is a gap
+	 * of cleared bits between the nodes, and the next cleared bit
+	 * is the first bit within the gap.
+	 */
+	if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+		return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+	/*
+	 * Second node is adjacent to the first node.
+	 * Because it is adjacent, its mask should be non-zero.  If all
+	 * its mask bits are set, then with it being adjacent, it should
+	 * have had the mask bits moved into the num_after setting of the
+	 * previous node.
+	 */
+	return node_first_clear(nodep2, 0);
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively set
+ * bits.  Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	sparsebit_idx_t idx;
+
+	assert(num >= 1);
+
+	for (idx = sparsebit_next_set(s, start);
+		idx != 0 && idx + num - 1 >= idx;
+		idx = sparsebit_next_set(s, idx)) {
+		assert(sparsebit_is_set(s, idx));
+
+		/*
+		 * Does the sequence of bits starting at idx consist of
+		 * num set bits?
+		 */
+		if (sparsebit_is_set_num(s, idx, num))
+			return idx;
+
+		/*
+		 * Sequence of set bits at idx isn't large enough.
+		 * Skip this entire sequence of set bits.
+		 */
+		idx = sparsebit_next_clear(s, idx);
+		if (idx == 0)
+			return 0;
+	}
+
+	return 0;
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively cleared
+ * bits.  Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	sparsebit_idx_t idx;
+
+	assert(num >= 1);
+
+	for (idx = sparsebit_next_clear(s, start);
+		idx != 0 && idx + num - 1 >= idx;
+		idx = sparsebit_next_clear(s, idx)) {
+		assert(sparsebit_is_clear(s, idx));
+
+		/*
+		 * Does the sequence of bits starting at idx consist of
+		 * num cleared bits?
+		 */
+		if (sparsebit_is_clear_num(s, idx, num))
+			return idx;
+
+		/*
+		 * Sequence of cleared bits at idx isn't large enough.
+		 * Skip this entire sequence of cleared bits.
+		 */
+		idx = sparsebit_next_set(s, idx);
+		if (idx == 0)
+			return 0;
+	}
+
+	return 0;
+}
+
+/* Sets the bits * in the inclusive range idx through idx + num - 1.  */
+void sparsebit_set_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	struct node *nodep, *next;
+	unsigned int n1;
+	sparsebit_idx_t idx;
+	sparsebit_num_t n;
+	sparsebit_idx_t middle_start, middle_end;
+
+	assert(num > 0);
+	assert(start + num - 1 >= start);
+
+	/*
+	 * Leading - bits before first mask boundary.
+	 *
+	 * TODO(lhuemill): With some effort it may be possible to
+	 *   replace the following loop with a sequential sequence
+	 *   of statements.  High level sequence would be:
+	 *
+	 *     1. Use node_split() to force node that describes setting
+	 *        of idx to be within the mask portion of a node.
+	 *     2. Form mask of bits to be set.
+	 *     3. Determine number of mask bits already set in the node
+	 *        and store in a local variable named num_already_set.
+	 *     4. Set the appropriate mask bits within the node.
+	 *     5. Increment struct sparsebit_pvt num_set member
+	 *        by the number of bits that were actually set.
+	 *        Exclude from the counts bits that were already set.
+	 *     6. Before returning to the caller, use node_reduce() to
+	 *        handle the multiple corner cases that this method
+	 *        introduces.
+	 */
+	for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+		bit_set(s, idx);
+
+	/* Middle - bits spanning one or more entire mask */
+	middle_start = idx;
+	middle_end = middle_start + (n & -MASK_BITS) - 1;
+	if (n >= MASK_BITS) {
+		nodep = node_split(s, middle_start);
+
+		/*
+		 * As needed, split just after end of middle bits.
+		 * No split needed if end of middle bits is at highest
+		 * supported bit index.
+		 */
+		if (middle_end + 1 > middle_end)
+			(void) node_split(s, middle_end + 1);
+
+		/* Delete nodes that only describe bits within the middle. */
+		for (next = node_next(s, nodep);
+			next && (next->idx < middle_end);
+			next = node_next(s, nodep)) {
+			assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+			node_rm(s, next);
+			next = NULL;
+		}
+
+		/* As needed set each of the mask bits */
+		for (n1 = 0; n1 < MASK_BITS; n1++) {
+			if (!(nodep->mask & (1 << n1))) {
+				nodep->mask |= 1 << n1;
+				s->num_set++;
+			}
+		}
+
+		s->num_set -= nodep->num_after;
+		nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
+		s->num_set += nodep->num_after;
+
+		node_reduce(s, nodep);
+	}
+	idx = middle_end + 1;
+	n -= middle_end - middle_start + 1;
+
+	/* Trailing - bits at and beyond last mask boundary */
+	assert(n < MASK_BITS);
+	for (; n > 0; idx++, n--)
+		bit_set(s, idx);
+}
+
+/* Clears the bits * in the inclusive range idx through idx + num - 1.  */
+void sparsebit_clear_num(struct sparsebit *s,
+	sparsebit_idx_t start, sparsebit_num_t num)
+{
+	struct node *nodep, *next;
+	unsigned int n1;
+	sparsebit_idx_t idx;
+	sparsebit_num_t n;
+	sparsebit_idx_t middle_start, middle_end;
+
+	assert(num > 0);
+	assert(start + num - 1 >= start);
+
+	/* Leading - bits before first mask boundary */
+	for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+		bit_clear(s, idx);
+
+	/* Middle - bits spanning one or more entire mask */
+	middle_start = idx;
+	middle_end = middle_start + (n & -MASK_BITS) - 1;
+	if (n >= MASK_BITS) {
+		nodep = node_split(s, middle_start);
+
+		/*
+		 * As needed, split just after end of middle bits.
+		 * No split needed if end of middle bits is at highest
+		 * supported bit index.
+		 */
+		if (middle_end + 1 > middle_end)
+			(void) node_split(s, middle_end + 1);
+
+		/* Delete nodes that only describe bits within the middle. */
+		for (next = node_next(s, nodep);
+			next && (next->idx < middle_end);
+			next = node_next(s, nodep)) {
+			assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+			node_rm(s, next);
+			next = NULL;
+		}
+
+		/* As needed clear each of the mask bits */
+		for (n1 = 0; n1 < MASK_BITS; n1++) {
+			if (nodep->mask & (1 << n1)) {
+				nodep->mask &= ~(1 << n1);
+				s->num_set--;
+			}
+		}
+
+		/* Clear any bits described by num_after */
+		s->num_set -= nodep->num_after;
+		nodep->num_after = 0;
+
+		/*
+		 * Delete the node that describes the beginning of
+		 * the middle bits and perform any allowed reductions
+		 * with the nodes prev or next of nodep.
+		 */
+		node_reduce(s, nodep);
+		nodep = NULL;
+	}
+	idx = middle_end + 1;
+	n -= middle_end - middle_start + 1;
+
+	/* Trailing - bits at and beyond last mask boundary */
+	assert(n < MASK_BITS);
+	for (; n > 0; idx++, n--)
+		bit_clear(s, idx);
+}
+
+/* Sets the bit at the index given by idx.  */
+void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	sparsebit_set_num(s, idx, 1);
+}
+
+/* Clears the bit at the index given by idx.  */
+void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+	sparsebit_clear_num(s, idx, 1);
+}
+
+/* Sets the bits in the entire addressable range of the sparsebit array.  */
+void sparsebit_set_all(struct sparsebit *s)
+{
+	sparsebit_set(s, 0);
+	sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
+	assert(sparsebit_all_set(s));
+}
+
+/* Clears the bits in the entire addressable range of the sparsebit array.  */
+void sparsebit_clear_all(struct sparsebit *s)
+{
+	sparsebit_clear(s, 0);
+	sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
+	assert(!sparsebit_any_set(s));
+}
+
+static size_t display_range(FILE *stream, sparsebit_idx_t low,
+	sparsebit_idx_t high, bool prepend_comma_space)
+{
+	char *fmt_str;
+	size_t sz;
+
+	/* Determine the printf format string */
+	if (low == high)
+		fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
+	else
+		fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
+
+	/*
+	 * When stream is NULL, just determine the size of what would
+	 * have been printed, else print the range.
+	 */
+	if (!stream)
+		sz = snprintf(NULL, 0, fmt_str, low, high);
+	else
+		sz = fprintf(stream, fmt_str, low, high);
+
+	return sz;
+}
+
+
+/* Dumps to the FILE stream given by stream, the bit settings
+ * of s.  Each line of output is prefixed with the number of
+ * spaces given by indent.  The length of each line is implementation
+ * dependent and does not depend on the indent amount.  The following
+ * is an example output of a sparsebit array that has bits:
+ *
+ *   0x5, 0x8, 0xa:0xe, 0x12
+ *
+ * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
+ * are set.  Note that a ':', instead of a '-' is used to specify a range of
+ * contiguous bits.  This is done because '-' is used to specify command-line
+ * options, and sometimes ranges are specified as command-line arguments.
+ */
+void sparsebit_dump(FILE *stream, struct sparsebit *s,
+	unsigned int indent)
+{
+	size_t current_line_len = 0;
+	size_t sz;
+	struct node *nodep;
+
+	if (!sparsebit_any_set(s))
+		return;
+
+	/* Display initial indent */
+	fprintf(stream, "%*s", indent, "");
+
+	/* For each node */
+	for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
+		unsigned int n1;
+		sparsebit_idx_t low, high;
+
+		/* For each group of bits in the mask */
+		for (n1 = 0; n1 < MASK_BITS; n1++) {
+			if (nodep->mask & (1 << n1)) {
+				low = high = nodep->idx + n1;
+
+				for (; n1 < MASK_BITS; n1++) {
+					if (nodep->mask & (1 << n1))
+						high = nodep->idx + n1;
+					else
+						break;
+				}
+
+				if ((n1 == MASK_BITS) && nodep->num_after)
+					high += nodep->num_after;
+
+				/*
+				 * How much room will it take to display
+				 * this range.
+				 */
+				sz = display_range(NULL, low, high,
+					current_line_len != 0);
+
+				/*
+				 * If there is not enough room, display
+				 * a newline plus the indent of the next
+				 * line.
+				 */
+				if (current_line_len + sz > DUMP_LINE_MAX) {
+					fputs("\n", stream);
+					fprintf(stream, "%*s", indent, "");
+					current_line_len = 0;
+				}
+
+				/* Display the range */
+				sz = display_range(stream, low, high,
+					current_line_len != 0);
+				current_line_len += sz;
+			}
+		}
+
+		/*
+		 * If num_after and most significant-bit of mask is not
+		 * set, then still need to display a range for the bits
+		 * described by num_after.
+		 */
+		if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
+			low = nodep->idx + MASK_BITS;
+			high = nodep->idx + MASK_BITS + nodep->num_after - 1;
+
+			/*
+			 * How much room will it take to display
+			 * this range.
+			 */
+			sz = display_range(NULL, low, high,
+				current_line_len != 0);
+
+			/*
+			 * If there is not enough room, display
+			 * a newline plus the indent of the next
+			 * line.
+			 */
+			if (current_line_len + sz > DUMP_LINE_MAX) {
+				fputs("\n", stream);
+				fprintf(stream, "%*s", indent, "");
+				current_line_len = 0;
+			}
+
+			/* Display the range */
+			sz = display_range(stream, low, high,
+				current_line_len != 0);
+			current_line_len += sz;
+		}
+	}
+	fputs("\n", stream);
+}
+
+/* Validates the internal state of the sparsebit array given by
+ * s.  On error, diagnostic information is printed to stderr and
+ * abort is called.
+ */
+void sparsebit_validate_internal(struct sparsebit *s)
+{
+	bool error_detected = false;
+	struct node *nodep, *prev = NULL;
+	sparsebit_num_t total_bits_set = 0;
+	unsigned int n1;
+
+	/* For each node */
+	for (nodep = node_first(s); nodep;
+		prev = nodep, nodep = node_next(s, nodep)) {
+
+		/*
+		 * Increase total bits set by the number of bits set
+		 * in this node.
+		 */
+		for (n1 = 0; n1 < MASK_BITS; n1++)
+			if (nodep->mask & (1 << n1))
+				total_bits_set++;
+
+		total_bits_set += nodep->num_after;
+
+		/*
+		 * Arbitrary choice as to whether a mask of 0 is allowed
+		 * or not.  For diagnostic purposes it is beneficial to
+		 * have only one valid means to represent a set of bits.
+		 * To support this an arbitrary choice has been made
+		 * to not allow a mask of zero.
+		 */
+		if (nodep->mask == 0) {
+			fprintf(stderr, "Node mask of zero, "
+				"nodep: %p nodep->mask: 0x%x",
+				nodep, nodep->mask);
+			error_detected = true;
+			break;
+		}
+
+		/*
+		 * Validate num_after is not greater than the max index
+		 * - the number of mask bits.  The num_after member
+		 * uses 0-based indexing and thus has no value that
+		 * represents all bits set.  This limitation is handled
+		 * by requiring a non-zero mask.  With a non-zero mask,
+		 * MASK_BITS worth of bits are described by the mask,
+		 * which makes the largest needed num_after equal to:
+		 *
+		 *    (~(sparsebit_num_t) 0) - MASK_BITS + 1
+		 */
+		if (nodep->num_after
+			> (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
+			fprintf(stderr, "num_after too large, "
+				"nodep: %p nodep->num_after: 0x%lx",
+				nodep, nodep->num_after);
+			error_detected = true;
+			break;
+		}
+
+		/* Validate node index is divisible by the mask size */
+		if (nodep->idx % MASK_BITS) {
+			fprintf(stderr, "Node index not divisible by "
+				"mask size,\n"
+				"  nodep: %p nodep->idx: 0x%lx "
+				"MASK_BITS: %lu\n",
+				nodep, nodep->idx, MASK_BITS);
+			error_detected = true;
+			break;
+		}
+
+		/*
+		 * Validate bits described by node don't wrap beyond the
+		 * highest supported index.
+		 */
+		if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
+			fprintf(stderr, "Bits described by node wrap "
+				"beyond highest supported index,\n"
+				"  nodep: %p nodep->idx: 0x%lx\n"
+				"  MASK_BITS: %lu nodep->num_after: 0x%lx",
+				nodep, nodep->idx, MASK_BITS, nodep->num_after);
+			error_detected = true;
+			break;
+		}
+
+		/* Check parent pointers. */
+		if (nodep->left) {
+			if (nodep->left->parent != nodep) {
+				fprintf(stderr, "Left child parent pointer "
+					"doesn't point to this node,\n"
+					"  nodep: %p nodep->left: %p "
+					"nodep->left->parent: %p",
+					nodep, nodep->left,
+					nodep->left->parent);
+				error_detected = true;
+				break;
+			}
+		}
+
+		if (nodep->right) {
+			if (nodep->right->parent != nodep) {
+				fprintf(stderr, "Right child parent pointer "
+					"doesn't point to this node,\n"
+					"  nodep: %p nodep->right: %p "
+					"nodep->right->parent: %p",
+					nodep, nodep->right,
+					nodep->right->parent);
+				error_detected = true;
+				break;
+			}
+		}
+
+		if (!nodep->parent) {
+			if (s->root != nodep) {
+				fprintf(stderr, "Unexpected root node, "
+					"s->root: %p nodep: %p",
+					s->root, nodep);
+				error_detected = true;
+				break;
+			}
+		}
+
+		if (prev) {
+			/*
+			 * Is index of previous node before index of
+			 * current node?
+			 */
+			if (prev->idx >= nodep->idx) {
+				fprintf(stderr, "Previous node index "
+					">= current node index,\n"
+					"  prev: %p prev->idx: 0x%lx\n"
+					"  nodep: %p nodep->idx: 0x%lx",
+					prev, prev->idx, nodep, nodep->idx);
+				error_detected = true;
+				break;
+			}
+
+			/*
+			 * Nodes occur in asscending order, based on each
+			 * nodes starting index.
+			 */
+			if ((prev->idx + MASK_BITS + prev->num_after - 1)
+				>= nodep->idx) {
+				fprintf(stderr, "Previous node bit range "
+					"overlap with current node bit range,\n"
+					"  prev: %p prev->idx: 0x%lx "
+					"prev->num_after: 0x%lx\n"
+					"  nodep: %p nodep->idx: 0x%lx "
+					"nodep->num_after: 0x%lx\n"
+					"  MASK_BITS: %lu",
+					prev, prev->idx, prev->num_after,
+					nodep, nodep->idx, nodep->num_after,
+					MASK_BITS);
+				error_detected = true;
+				break;
+			}
+
+			/*
+			 * When the node has all mask bits set, it shouldn't
+			 * be adjacent to the last bit described by the
+			 * previous node.
+			 */
+			if (nodep->mask == ~(mask_t) 0 &&
+			    prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
+				fprintf(stderr, "Current node has mask with "
+					"all bits set and is adjacent to the "
+					"previous node,\n"
+					"  prev: %p prev->idx: 0x%lx "
+					"prev->num_after: 0x%lx\n"
+					"  nodep: %p nodep->idx: 0x%lx "
+					"nodep->num_after: 0x%lx\n"
+					"  MASK_BITS: %lu",
+					prev, prev->idx, prev->num_after,
+					nodep, nodep->idx, nodep->num_after,
+					MASK_BITS);
+
+				error_detected = true;
+				break;
+			}
+		}
+	}
+
+	if (!error_detected) {
+		/*
+		 * Is sum of bits set in each node equal to the count
+		 * of total bits set.
+		 */
+		if (s->num_set != total_bits_set) {
+			fprintf(stderr, "Number of bits set missmatch,\n"
+				"  s->num_set: 0x%lx total_bits_set: 0x%lx",
+				s->num_set, total_bits_set);
+
+			error_detected = true;
+		}
+	}
+
+	if (error_detected) {
+		fputs("  dump_internal:\n", stderr);
+		sparsebit_dump_internal(stderr, s, 4);
+		abort();
+	}
+}
+
+
+#ifdef FUZZ
+/* A simple but effective fuzzing driver.  Look for bugs with the help
+ * of some invariants and of a trivial representation of sparsebit.
+ * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
+ * afl-fuzz do the magic. :)
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct range {
+	sparsebit_idx_t first, last;
+	bool set;
+};
+
+struct sparsebit *s;
+struct range ranges[1000];
+int num_ranges;
+
+static bool get_value(sparsebit_idx_t idx)
+{
+	int i;
+
+	for (i = num_ranges; --i >= 0; )
+		if (ranges[i].first <= idx && idx <= ranges[i].last)
+			return ranges[i].set;
+
+	return false;
+}
+
+static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
+{
+	sparsebit_num_t num;
+	sparsebit_idx_t next;
+
+	if (first < last) {
+		num = last - first + 1;
+	} else {
+		num = first - last + 1;
+		first = last;
+		last = first + num - 1;
+	}
+
+	switch (code) {
+	case 0:
+		sparsebit_set(s, first);
+		assert(sparsebit_is_set(s, first));
+		assert(!sparsebit_is_clear(s, first));
+		assert(sparsebit_any_set(s));
+		assert(!sparsebit_all_clear(s));
+		if (get_value(first))
+			return;
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = first, .set = true };
+		break;
+	case 1:
+		sparsebit_clear(s, first);
+		assert(!sparsebit_is_set(s, first));
+		assert(sparsebit_is_clear(s, first));
+		assert(sparsebit_any_clear(s));
+		assert(!sparsebit_all_set(s));
+		if (!get_value(first))
+			return;
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = first, .set = false };
+		break;
+	case 2:
+		assert(sparsebit_is_set(s, first) == get_value(first));
+		assert(sparsebit_is_clear(s, first) == !get_value(first));
+		break;
+	case 3:
+		if (sparsebit_any_set(s))
+			assert(get_value(sparsebit_first_set(s)));
+		if (sparsebit_any_clear(s))
+			assert(!get_value(sparsebit_first_clear(s)));
+		sparsebit_set_all(s);
+		assert(!sparsebit_any_clear(s));
+		assert(sparsebit_all_set(s));
+		num_ranges = 0;
+		ranges[num_ranges++] = (struct range)
+			{ .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
+		break;
+	case 4:
+		if (sparsebit_any_set(s))
+			assert(get_value(sparsebit_first_set(s)));
+		if (sparsebit_any_clear(s))
+			assert(!get_value(sparsebit_first_clear(s)));
+		sparsebit_clear_all(s);
+		assert(!sparsebit_any_set(s));
+		assert(sparsebit_all_clear(s));
+		num_ranges = 0;
+		break;
+	case 5:
+		next = sparsebit_next_set(s, first);
+		assert(next == 0 || next > first);
+		assert(next == 0 || get_value(next));
+		break;
+	case 6:
+		next = sparsebit_next_clear(s, first);
+		assert(next == 0 || next > first);
+		assert(next == 0 || !get_value(next));
+		break;
+	case 7:
+		next = sparsebit_next_clear(s, first);
+		if (sparsebit_is_set_num(s, first, num)) {
+			assert(next == 0 || next > last);
+			if (first)
+				next = sparsebit_next_set(s, first - 1);
+			else if (sparsebit_any_set(s))
+				next = sparsebit_first_set(s);
+			else
+				return;
+			assert(next == first);
+		} else {
+			assert(sparsebit_is_clear(s, first) || next <= last);
+		}
+		break;
+	case 8:
+		next = sparsebit_next_set(s, first);
+		if (sparsebit_is_clear_num(s, first, num)) {
+			assert(next == 0 || next > last);
+			if (first)
+				next = sparsebit_next_clear(s, first - 1);
+			else if (sparsebit_any_clear(s))
+				next = sparsebit_first_clear(s);
+			else
+				return;
+			assert(next == first);
+		} else {
+			assert(sparsebit_is_set(s, first) || next <= last);
+		}
+		break;
+	case 9:
+		sparsebit_set_num(s, first, num);
+		assert(sparsebit_is_set_num(s, first, num));
+		assert(!sparsebit_is_clear_num(s, first, num));
+		assert(sparsebit_any_set(s));
+		assert(!sparsebit_all_clear(s));
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = last, .set = true };
+		break;
+	case 10:
+		sparsebit_clear_num(s, first, num);
+		assert(!sparsebit_is_set_num(s, first, num));
+		assert(sparsebit_is_clear_num(s, first, num));
+		assert(sparsebit_any_clear(s));
+		assert(!sparsebit_all_set(s));
+		if (num_ranges == 1000)
+			exit(0);
+		ranges[num_ranges++] = (struct range)
+			{ .first = first, .last = last, .set = false };
+		break;
+	case 11:
+		sparsebit_validate_internal(s);
+		break;
+	default:
+		break;
+	}
+}
+
+unsigned char get8(void)
+{
+	int ch;
+
+	ch = getchar();
+	if (ch == EOF)
+		exit(0);
+	return ch;
+}
+
+uint64_t get64(void)
+{
+	uint64_t x;
+
+	x = get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	x = (x << 8) | get8();
+	return (x << 8) | get8();
+}
+
+int main(void)
+{
+	s = sparsebit_alloc();
+	for (;;) {
+		uint8_t op = get8() & 0xf;
+		uint64_t first = get64();
+		uint64_t last = get64();
+
+		operate(op, first, last);
+	}
+}
+#endif
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c
new file mode 100644
index 000000000..b987c3c97
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/vmx.c
@@ -0,0 +1,283 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+/* Allocate memory regions for nested VMX tests.
+ *
+ * Input Args:
+ *   vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ *   p_vmx_gva - The guest virtual address for the struct vmx_pages.
+ *
+ * Return:
+ *   Pointer to structure with the addresses of the VMX areas.
+ */
+struct vmx_pages *
+vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
+{
+	vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
+
+	/* Setup of a region of guest memory for the vmxon region. */
+	vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
+	vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
+
+	/* Setup of a region of guest memory for a vmcs. */
+	vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
+	vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
+
+	/* Setup of a region of guest memory for the MSR bitmap. */
+	vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
+	vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
+	memset(vmx->msr_hva, 0, getpagesize());
+
+	/* Setup of a region of guest memory for the shadow VMCS. */
+	vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
+	vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
+
+	/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
+	vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
+	vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
+	memset(vmx->vmread_hva, 0, getpagesize());
+
+	vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+	vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
+	vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
+	memset(vmx->vmwrite_hva, 0, getpagesize());
+
+	*p_vmx_gva = vmx_gva;
+	return vmx;
+}
+
+bool prepare_for_vmx_operation(struct vmx_pages *vmx)
+{
+	uint64_t feature_control;
+	uint64_t required;
+	unsigned long cr0;
+	unsigned long cr4;
+
+	/*
+	 * Ensure bits in CR0 and CR4 are valid in VMX operation:
+	 * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
+	 * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
+	 */
+	__asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
+	cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+	cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+	__asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
+
+	__asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
+	cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+	cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+	/* Enable VMX operation */
+	cr4 |= X86_CR4_VMXE;
+	__asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
+
+	/*
+	 * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
+	 *  Bit 0: Lock bit. If clear, VMXON causes a #GP.
+	 *  Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
+	 *    outside of SMX causes a #GP.
+	 */
+	required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+	required |= FEATURE_CONTROL_LOCKED;
+	feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+	if ((feature_control & required) != required)
+		wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required);
+
+	/* Enter VMX root operation. */
+	*(uint32_t *)(vmx->vmxon) = vmcs_revision();
+	if (vmxon(vmx->vmxon_gpa))
+		return false;
+
+	/* Load a VMCS. */
+	*(uint32_t *)(vmx->vmcs) = vmcs_revision();
+	if (vmclear(vmx->vmcs_gpa))
+		return false;
+
+	if (vmptrld(vmx->vmcs_gpa))
+		return false;
+
+	/* Setup shadow VMCS, do not load it yet. */
+	*(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+	if (vmclear(vmx->shadow_vmcs_gpa))
+		return false;
+
+	return true;
+}
+
+/*
+ * Initialize the control fields to the most basic settings possible.
+ */
+static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
+{
+	vmwrite(VIRTUAL_PROCESSOR_ID, 0);
+	vmwrite(POSTED_INTR_NV, 0);
+
+	vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
+	if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0))
+		vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+			rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+	else
+		vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
+	vmwrite(EXCEPTION_BITMAP, 0);
+	vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+	vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
+	vmwrite(CR3_TARGET_COUNT, 0);
+	vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
+		VM_EXIT_HOST_ADDR_SPACE_SIZE);	  /* 64-bit host */
+	vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+	vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+	vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
+		VM_ENTRY_IA32E_MODE);		  /* 64-bit guest */
+	vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+	vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+	vmwrite(TPR_THRESHOLD, 0);
+
+	vmwrite(CR0_GUEST_HOST_MASK, 0);
+	vmwrite(CR4_GUEST_HOST_MASK, 0);
+	vmwrite(CR0_READ_SHADOW, get_cr0());
+	vmwrite(CR4_READ_SHADOW, get_cr4());
+
+	vmwrite(MSR_BITMAP, vmx->msr_gpa);
+	vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
+	vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
+}
+
+/*
+ * Initialize the host state fields based on the current host state, with
+ * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
+ * or vmresume.
+ */
+static inline void init_vmcs_host_state(void)
+{
+	uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+
+	vmwrite(HOST_ES_SELECTOR, get_es());
+	vmwrite(HOST_CS_SELECTOR, get_cs());
+	vmwrite(HOST_SS_SELECTOR, get_ss());
+	vmwrite(HOST_DS_SELECTOR, get_ds());
+	vmwrite(HOST_FS_SELECTOR, get_fs());
+	vmwrite(HOST_GS_SELECTOR, get_gs());
+	vmwrite(HOST_TR_SELECTOR, get_tr());
+
+	if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
+		vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
+	if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
+		vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
+	if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+		vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
+			rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
+
+	vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
+
+	vmwrite(HOST_CR0, get_cr0());
+	vmwrite(HOST_CR3, get_cr3());
+	vmwrite(HOST_CR4, get_cr4());
+	vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
+	vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
+	vmwrite(HOST_TR_BASE,
+		get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr())));
+	vmwrite(HOST_GDTR_BASE, get_gdt_base());
+	vmwrite(HOST_IDTR_BASE, get_idt_base());
+	vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
+	vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
+}
+
+/*
+ * Initialize the guest state fields essentially as a clone of
+ * the host state fields. Some host state fields have fixed
+ * values, and we set the corresponding guest state fields accordingly.
+ */
+static inline void init_vmcs_guest_state(void *rip, void *rsp)
+{
+	vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
+	vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
+	vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
+	vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
+	vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
+	vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
+	vmwrite(GUEST_LDTR_SELECTOR, 0);
+	vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
+	vmwrite(GUEST_INTR_STATUS, 0);
+	vmwrite(GUEST_PML_INDEX, 0);
+
+	vmwrite(VMCS_LINK_POINTER, -1ll);
+	vmwrite(GUEST_IA32_DEBUGCTL, 0);
+	vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
+	vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
+	vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
+		vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
+
+	vmwrite(GUEST_ES_LIMIT, -1);
+	vmwrite(GUEST_CS_LIMIT, -1);
+	vmwrite(GUEST_SS_LIMIT, -1);
+	vmwrite(GUEST_DS_LIMIT, -1);
+	vmwrite(GUEST_FS_LIMIT, -1);
+	vmwrite(GUEST_GS_LIMIT, -1);
+	vmwrite(GUEST_LDTR_LIMIT, -1);
+	vmwrite(GUEST_TR_LIMIT, 0x67);
+	vmwrite(GUEST_GDTR_LIMIT, 0xffff);
+	vmwrite(GUEST_IDTR_LIMIT, 0xffff);
+	vmwrite(GUEST_ES_AR_BYTES,
+		vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
+	vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+	vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+	vmwrite(GUEST_DS_AR_BYTES,
+		vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+	vmwrite(GUEST_FS_AR_BYTES,
+		vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+	vmwrite(GUEST_GS_AR_BYTES,
+		vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+	vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
+	vmwrite(GUEST_TR_AR_BYTES, 0x8b);
+	vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+	vmwrite(GUEST_ACTIVITY_STATE, 0);
+	vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
+	vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+	vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
+	vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
+	vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
+	vmwrite(GUEST_ES_BASE, 0);
+	vmwrite(GUEST_CS_BASE, 0);
+	vmwrite(GUEST_SS_BASE, 0);
+	vmwrite(GUEST_DS_BASE, 0);
+	vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
+	vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
+	vmwrite(GUEST_LDTR_BASE, 0);
+	vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
+	vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
+	vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
+	vmwrite(GUEST_DR7, 0x400);
+	vmwrite(GUEST_RSP, (uint64_t)rsp);
+	vmwrite(GUEST_RIP, (uint64_t)rip);
+	vmwrite(GUEST_RFLAGS, 2);
+	vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+	vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
+	vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
+}
+
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
+{
+	init_vmcs_control_fields(vmx);
+	init_vmcs_host_state();
+	init_vmcs_guest_state(guest_rip, guest_rsp);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c
new file mode 100644
index 000000000..800fe3606
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86.c
@@ -0,0 +1,893 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+#include "x86.h"
+
+/* Minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+/* Virtual translation table structure declarations */
+struct pageMapL4Entry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t ignored_06:1;
+	uint64_t page_size:1;
+	uint64_t ignored_11_08:4;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+struct pageDirectoryPointerEntry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t ignored_06:1;
+	uint64_t page_size:1;
+	uint64_t ignored_11_08:4;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+struct pageDirectoryEntry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t ignored_06:1;
+	uint64_t page_size:1;
+	uint64_t ignored_11_08:4;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+struct pageTableEntry {
+	uint64_t present:1;
+	uint64_t writable:1;
+	uint64_t user:1;
+	uint64_t write_through:1;
+	uint64_t cache_disable:1;
+	uint64_t accessed:1;
+	uint64_t dirty:1;
+	uint64_t reserved_07:1;
+	uint64_t global:1;
+	uint64_t ignored_11_09:3;
+	uint64_t address:40;
+	uint64_t ignored_62_52:11;
+	uint64_t execute_disable:1;
+};
+
+/* Register Dump
+ *
+ * Input Args:
+ *   indent - Left margin indent amount
+ *   regs - register
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the registers given by regs, to the FILE stream
+ * given by steam.
+ */
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+	       uint8_t indent)
+{
+	fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+		"rcx: 0x%.16llx rdx: 0x%.16llx\n",
+		indent, "",
+		regs->rax, regs->rbx, regs->rcx, regs->rdx);
+	fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+		"rsp: 0x%.16llx rbp: 0x%.16llx\n",
+		indent, "",
+		regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+	fprintf(stream, "%*sr8:  0x%.16llx r9:  0x%.16llx "
+		"r10: 0x%.16llx r11: 0x%.16llx\n",
+		indent, "",
+		regs->r8, regs->r9, regs->r10, regs->r11);
+	fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+		"r14: 0x%.16llx r15: 0x%.16llx\n",
+		indent, "",
+		regs->r12, regs->r13, regs->r14, regs->r15);
+	fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+		indent, "",
+		regs->rip, regs->rflags);
+}
+
+/* Segment Dump
+ *
+ * Input Args:
+ *   indent - Left margin indent amount
+ *   segment - KVM segment
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM segment given by segment, to the FILE stream
+ * given by steam.
+ */
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+			 uint8_t indent)
+{
+	fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+		"selector: 0x%.4x type: 0x%.2x\n",
+		indent, "", segment->base, segment->limit,
+		segment->selector, segment->type);
+	fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+		"db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+		indent, "", segment->present, segment->dpl,
+		segment->db, segment->s, segment->l);
+	fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+		"unusable: 0x%.2x padding: 0x%.2x\n",
+		indent, "", segment->g, segment->avl,
+		segment->unusable, segment->padding);
+}
+
+/* dtable Dump
+ *
+ * Input Args:
+ *   indent - Left margin indent amount
+ *   dtable - KVM dtable
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM dtable given by dtable, to the FILE stream
+ * given by steam.
+ */
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+			uint8_t indent)
+{
+	fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+		"padding: 0x%.4x 0x%.4x 0x%.4x\n",
+		indent, "", dtable->base, dtable->limit,
+		dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+/* System Register Dump
+ *
+ * Input Args:
+ *   indent - Left margin indent amount
+ *   sregs - System registers
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the system registers given by sregs, to the FILE stream
+ * given by steam.
+ */
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+		uint8_t indent)
+{
+	unsigned int i;
+
+	fprintf(stream, "%*scs:\n", indent, "");
+	segment_dump(stream, &sregs->cs, indent + 2);
+	fprintf(stream, "%*sds:\n", indent, "");
+	segment_dump(stream, &sregs->ds, indent + 2);
+	fprintf(stream, "%*ses:\n", indent, "");
+	segment_dump(stream, &sregs->es, indent + 2);
+	fprintf(stream, "%*sfs:\n", indent, "");
+	segment_dump(stream, &sregs->fs, indent + 2);
+	fprintf(stream, "%*sgs:\n", indent, "");
+	segment_dump(stream, &sregs->gs, indent + 2);
+	fprintf(stream, "%*sss:\n", indent, "");
+	segment_dump(stream, &sregs->ss, indent + 2);
+	fprintf(stream, "%*str:\n", indent, "");
+	segment_dump(stream, &sregs->tr, indent + 2);
+	fprintf(stream, "%*sldt:\n", indent, "");
+	segment_dump(stream, &sregs->ldt, indent + 2);
+
+	fprintf(stream, "%*sgdt:\n", indent, "");
+	dtable_dump(stream, &sregs->gdt, indent + 2);
+	fprintf(stream, "%*sidt:\n", indent, "");
+	dtable_dump(stream, &sregs->idt, indent + 2);
+
+	fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+		"cr3: 0x%.16llx cr4: 0x%.16llx\n",
+		indent, "",
+		sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+	fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+		"apic_base: 0x%.16llx\n",
+		indent, "",
+		sregs->cr8, sregs->efer, sregs->apic_base);
+
+	fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+		fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+			sregs->interrupt_bitmap[i]);
+	}
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+	int rc;
+
+	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	/* If needed, create page map l4 table. */
+	if (!vm->pgd_created) {
+		vm_paddr_t paddr = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+		vm->pgd = paddr;
+		vm->pgd_created = true;
+	}
+}
+
+/* VM Virtual Page Map
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vaddr - VM Virtual Address
+ *   paddr - VM Physical Address
+ *   pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the page
+ * starting at vaddr to the page starting at paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+	uint32_t pgd_memslot)
+{
+	uint16_t index[4];
+	struct pageMapL4Entry *pml4e;
+
+	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	TEST_ASSERT((vaddr % vm->page_size) == 0,
+		"Virtual address not on page boundary,\n"
+		"  vaddr: 0x%lx vm->page_size: 0x%x",
+		vaddr, vm->page_size);
+	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+		(vaddr >> vm->page_shift)),
+		"Invalid virtual address, vaddr: 0x%lx",
+		vaddr);
+	TEST_ASSERT((paddr % vm->page_size) == 0,
+		"Physical address not on page boundary,\n"
+		"  paddr: 0x%lx vm->page_size: 0x%x",
+		paddr, vm->page_size);
+	TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+		"Physical address beyond beyond maximum supported,\n"
+		"  paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+		paddr, vm->max_gfn, vm->page_size);
+
+	index[0] = (vaddr >> 12) & 0x1ffu;
+	index[1] = (vaddr >> 21) & 0x1ffu;
+	index[2] = (vaddr >> 30) & 0x1ffu;
+	index[3] = (vaddr >> 39) & 0x1ffu;
+
+	/* Allocate page directory pointer table if not present. */
+	pml4e = addr_gpa2hva(vm, vm->pgd);
+	if (!pml4e[index[3]].present) {
+		pml4e[index[3]].address = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			>> vm->page_shift;
+		pml4e[index[3]].writable = true;
+		pml4e[index[3]].present = true;
+	}
+
+	/* Allocate page directory table if not present. */
+	struct pageDirectoryPointerEntry *pdpe;
+	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+	if (!pdpe[index[2]].present) {
+		pdpe[index[2]].address = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			>> vm->page_shift;
+		pdpe[index[2]].writable = true;
+		pdpe[index[2]].present = true;
+	}
+
+	/* Allocate page table if not present. */
+	struct pageDirectoryEntry *pde;
+	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+	if (!pde[index[1]].present) {
+		pde[index[1]].address = vm_phy_page_alloc(vm,
+			KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+			>> vm->page_shift;
+		pde[index[1]].writable = true;
+		pde[index[1]].present = true;
+	}
+
+	/* Fill in page table entry. */
+	struct pageTableEntry *pte;
+	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+	pte[index[0]].address = paddr >> vm->page_shift;
+	pte[index[0]].writable = true;
+	pte[index[0]].present = 1;
+}
+
+/* Virtual Translation Tables Dump
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   indent - Left margin indent amount
+ *
+ * Output Args:
+ *   stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by stream, the contents of all the
+ * virtual translation tables for the VM given by vm.
+ */
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+	struct pageMapL4Entry *pml4e, *pml4e_start;
+	struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
+	struct pageDirectoryEntry *pde, *pde_start;
+	struct pageTableEntry *pte, *pte_start;
+
+	if (!vm->pgd_created)
+		return;
+
+	fprintf(stream, "%*s                                          "
+		"                no\n", indent, "");
+	fprintf(stream, "%*s      index hvaddr         gpaddr         "
+		"addr         w exec dirty\n",
+		indent, "");
+	pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
+		vm->pgd);
+	for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+		pml4e = &pml4e_start[n1];
+		if (!pml4e->present)
+			continue;
+		fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+			" %u\n",
+			indent, "",
+			pml4e - pml4e_start, pml4e,
+			addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+			pml4e->writable, pml4e->execute_disable);
+
+		pdpe_start = addr_gpa2hva(vm, pml4e->address
+			* vm->page_size);
+		for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+			pdpe = &pdpe_start[n2];
+			if (!pdpe->present)
+				continue;
+			fprintf(stream, "%*spdpe  0x%-3zx %p 0x%-12lx 0x%-10lx "
+				"%u  %u\n",
+				indent, "",
+				pdpe - pdpe_start, pdpe,
+				addr_hva2gpa(vm, pdpe),
+				(uint64_t) pdpe->address, pdpe->writable,
+				pdpe->execute_disable);
+
+			pde_start = addr_gpa2hva(vm,
+				pdpe->address * vm->page_size);
+			for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+				pde = &pde_start[n3];
+				if (!pde->present)
+					continue;
+				fprintf(stream, "%*spde   0x%-3zx %p "
+					"0x%-12lx 0x%-10lx %u  %u\n",
+					indent, "", pde - pde_start, pde,
+					addr_hva2gpa(vm, pde),
+					(uint64_t) pde->address, pde->writable,
+					pde->execute_disable);
+
+				pte_start = addr_gpa2hva(vm,
+					pde->address * vm->page_size);
+				for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+					pte = &pte_start[n4];
+					if (!pte->present)
+						continue;
+					fprintf(stream, "%*spte   0x%-3zx %p "
+						"0x%-12lx 0x%-10lx %u  %u "
+						"    %u    0x%-10lx\n",
+						indent, "",
+						pte - pte_start, pte,
+						addr_hva2gpa(vm, pte),
+						(uint64_t) pte->address,
+						pte->writable,
+						pte->execute_disable,
+						pte->dirty,
+						((uint64_t) n1 << 27)
+							| ((uint64_t) n2 << 18)
+							| ((uint64_t) n3 << 9)
+							| ((uint64_t) n4));
+				}
+			}
+		}
+	}
+}
+
+/* Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *   segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+	memset(segp, 0, sizeof(*segp));
+	segp->unusable = true;
+}
+
+static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
+{
+	void *gdt = addr_gva2hva(vm, vm->gdt);
+	struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
+
+	desc->limit0 = segp->limit & 0xFFFF;
+	desc->base0 = segp->base & 0xFFFF;
+	desc->base1 = segp->base >> 16;
+	desc->type = segp->type;
+	desc->s = segp->s;
+	desc->dpl = segp->dpl;
+	desc->p = segp->present;
+	desc->limit1 = segp->limit >> 16;
+	desc->avl = segp->avl;
+	desc->l = segp->l;
+	desc->db = segp->db;
+	desc->g = segp->g;
+	desc->base2 = segp->base >> 24;
+	if (!segp->s)
+		desc->base3 = segp->base >> 32;
+}
+
+
+/* Set Long Mode Flat Kernel Code Segment
+ *
+ * Input Args:
+ *   vm - VM whose GDT is being filled, or NULL to only write segp
+ *   selector - selector value
+ *
+ * Output Args:
+ *   segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a code segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
+	struct kvm_segment *segp)
+{
+	memset(segp, 0, sizeof(*segp));
+	segp->selector = selector;
+	segp->limit = 0xFFFFFFFFu;
+	segp->s = 0x1; /* kTypeCodeData */
+	segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+					  * | kFlagCodeReadable
+					  */
+	segp->g = true;
+	segp->l = true;
+	segp->present = 1;
+	if (vm)
+		kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+/* Set Long Mode Flat Kernel Data Segment
+ *
+ * Input Args:
+ *   vm - VM whose GDT is being filled, or NULL to only write segp
+ *   selector - selector value
+ *
+ * Output Args:
+ *   segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a data segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
+	struct kvm_segment *segp)
+{
+	memset(segp, 0, sizeof(*segp));
+	segp->selector = selector;
+	segp->limit = 0xFFFFFFFFu;
+	segp->s = 0x1; /* kTypeCodeData */
+	segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+					  * | kFlagDataWritable
+					  */
+	segp->g = true;
+	segp->present = true;
+	if (vm)
+		kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+/* Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   gpa - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Equivalent VM physical address
+ *
+ * Translates the VM virtual address given by gva to a VM physical
+ * address and then locates the memory region containing the VM
+ * physical address, within the VM given by vm.  When found, the host
+ * virtual address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing translated
+ * VM virtual address exists.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint16_t index[4];
+	struct pageMapL4Entry *pml4e;
+	struct pageDirectoryPointerEntry *pdpe;
+	struct pageDirectoryEntry *pde;
+	struct pageTableEntry *pte;
+	void *hva;
+
+	TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+	index[0] = (gva >> 12) & 0x1ffu;
+	index[1] = (gva >> 21) & 0x1ffu;
+	index[2] = (gva >> 30) & 0x1ffu;
+	index[3] = (gva >> 39) & 0x1ffu;
+
+	if (!vm->pgd_created)
+		goto unmapped_gva;
+	pml4e = addr_gpa2hva(vm, vm->pgd);
+	if (!pml4e[index[3]].present)
+		goto unmapped_gva;
+
+	pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+	if (!pdpe[index[2]].present)
+		goto unmapped_gva;
+
+	pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+	if (!pde[index[1]].present)
+		goto unmapped_gva;
+
+	pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+	if (!pte[index[0]].present)
+		goto unmapped_gva;
+
+	return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+
+unmapped_gva:
+	TEST_ASSERT(false, "No mapping for vm virtual address, "
+		    "gva: 0x%lx", gva);
+}
+
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
+			  int pgd_memslot)
+{
+	if (!vm->gdt)
+		vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
+			KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+
+	dt->base = vm->gdt;
+	dt->limit = getpagesize();
+}
+
+static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
+				int selector, int gdt_memslot,
+				int pgd_memslot)
+{
+	if (!vm->tss)
+		vm->tss = vm_vaddr_alloc(vm, getpagesize(),
+			KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+
+	memset(segp, 0, sizeof(*segp));
+	segp->base = vm->tss;
+	segp->limit = 0x67;
+	segp->selector = selector;
+	segp->type = 0xb;
+	segp->present = 1;
+	kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+{
+	struct kvm_sregs sregs;
+
+	/* Set mode specific system register values. */
+	vcpu_sregs_get(vm, vcpuid, &sregs);
+
+	sregs.idt.limit = 0;
+
+	kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+
+	switch (vm->mode) {
+	case VM_MODE_FLAT48PG:
+		sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+		sregs.cr4 |= X86_CR4_PAE;
+		sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+		kvm_seg_set_unusable(&sregs.ldt);
+		kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
+		kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
+		kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
+		kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
+		break;
+
+	default:
+		TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+	}
+
+	sregs.cr3 = vm->pgd;
+	vcpu_sregs_set(vm, vcpuid, &sregs);
+}
+/* Adds a vCPU with reasonable defaults (i.e., a stack)
+ *
+ * Input Args:
+ *   vcpuid - The id of the VCPU to add to the VM.
+ *   guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+	struct kvm_mp_state mp_state;
+	struct kvm_regs regs;
+	vm_vaddr_t stack_vaddr;
+	stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+				     DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+	/* Create VCPU */
+	vm_vcpu_add(vm, vcpuid, 0, 0);
+
+	/* Setup guest general purpose registers */
+	vcpu_regs_get(vm, vcpuid, &regs);
+	regs.rflags = regs.rflags | 0x2;
+	regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
+	regs.rip = (unsigned long) guest_code;
+	vcpu_regs_set(vm, vcpuid, &regs);
+
+	/* Setup the MP state */
+	mp_state.mp_state = 0;
+	vcpu_set_mp_state(vm, vcpuid, &mp_state);
+}
+
+/* VM VCPU CPUID Set
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   vcpuid - VCPU id
+ *   cpuid - The CPUID values to set.
+ *
+ * Output Args: None
+ *
+ * Return: void
+ *
+ * Set the VCPU's CPUID.
+ */
+void vcpu_set_cpuid(struct kvm_vm *vm,
+		uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int rc;
+
+	TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+	rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+	TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
+		    rc, errno);
+
+}
+/* Create a VM with reasonable defaults
+ *
+ * Input Args:
+ *   vcpuid - The id of the single VCPU to add to the VM.
+ *   extra_mem_pages - The size of extra memories to add (this will
+ *                     decide how much extra space we will need to
+ *                     setup the page tables using mem slot 0)
+ *   guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+				 void *guest_code)
+{
+	struct kvm_vm *vm;
+	/*
+	 * For x86 the maximum page table size for a memory region
+	 * will be when only 4K pages are used.  In that case the
+	 * total extra size for page tables (for extra N pages) will
+	 * be: N/512+N/512^2+N/512^3+... which is definitely smaller
+	 * than N/512*2.
+	 */
+	uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+
+	/* Create VM */
+	vm = vm_create(VM_MODE_FLAT48PG,
+		       DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
+		       O_RDWR);
+
+	/* Setup guest code */
+	kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+	/* Setup IRQ Chip */
+	vm_create_irqchip(vm);
+
+	/* Add the first vCPU. */
+	vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+	return vm;
+}
+
+struct kvm_x86_state {
+	struct kvm_vcpu_events events;
+	struct kvm_mp_state mp_state;
+	struct kvm_regs regs;
+	struct kvm_xsave xsave;
+	struct kvm_xcrs xcrs;
+	struct kvm_sregs sregs;
+	struct kvm_debugregs debugregs;
+	union {
+		struct kvm_nested_state nested;
+		char nested_[16384];
+	};
+	struct kvm_msrs msrs;
+};
+
+static int kvm_get_num_msrs(struct kvm_vm *vm)
+{
+	struct kvm_msr_list nmsrs;
+	int r;
+
+	nmsrs.nmsrs = 0;
+	r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+	TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
+		r);
+
+	return nmsrs.nmsrs;
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	struct kvm_msr_list *list;
+	struct kvm_x86_state *state;
+	int nmsrs, r, i;
+	static int nested_size = -1;
+
+	if (nested_size == -1) {
+		nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
+		TEST_ASSERT(nested_size <= sizeof(state->nested_),
+			    "Nested state size too big, %i > %zi",
+			    nested_size, sizeof(state->nested_));
+	}
+
+	nmsrs = kvm_get_num_msrs(vm);
+	list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+	list->nmsrs = nmsrs;
+	r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+                r);
+
+	state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
+	r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
+                r);
+
+	if (kvm_check_cap(KVM_CAP_XCRS)) {
+		r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
+			    r);
+	}
+
+	r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
+                r);
+
+	if (nested_size) {
+		state->nested.size = sizeof(state->nested_);
+		r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
+			r);
+		TEST_ASSERT(state->nested.size <= nested_size,
+			"Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+			state->nested.size, nested_size);
+	} else
+		state->nested.size = 0;
+
+	state->msrs.nmsrs = nmsrs;
+	for (i = 0; i < nmsrs; i++)
+		state->msrs.entries[i].index = list->indices[i];
+	r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
+        TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)",
+                r, r == nmsrs ? -1 : list->indices[r]);
+
+	r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
+                r);
+
+	free(list);
+	return state;
+}
+
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+	int r;
+
+	if (state->nested.size) {
+		r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
+			r);
+	}
+
+	r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
+                r);
+
+	if (kvm_check_cap(KVM_CAP_XCRS)) {
+		r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
+		TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
+			    r);
+	}
+
+	r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
+        TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
+                r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
+
+	r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
+                r);
+
+	r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
+        TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
+                r);
+}
diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/platform_info_test.c
new file mode 100644
index 000000000..65db510dd
--- /dev/null
+++ b/tools/testing/selftests/kvm/platform_info_test.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of controlling guest access to
+ * MSR_PLATFORM_INFO.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 0
+#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
+
+static void guest_code(void)
+{
+	uint64_t msr_platform_info;
+
+	for (;;) {
+		msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+		GUEST_SYNC(msr_platform_info);
+		asm volatile ("inc %r11");
+	}
+}
+
+static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
+{
+	struct kvm_enable_cap cap = {};
+
+	cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
+	cap.flags = 0;
+	cap.args[0] = (int)enable;
+	vm_enable_cap(vm, &cap);
+}
+
+static void test_msr_platform_info_enabled(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct guest_args args;
+
+	set_msr_platform_info_enabled(vm, true);
+	vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			"Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
+			run->exit_reason,
+			exit_reason_str(run->exit_reason));
+	guest_args_read(vm, VCPU_ID, &args);
+	TEST_ASSERT(args.port == GUEST_PORT_SYNC,
+			"Received IO from port other than PORT_HOST_SYNC: %u\n",
+			run->io.port);
+	TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
+		MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+		"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
+		MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+}
+
+static void test_msr_platform_info_disabled(struct kvm_vm *vm)
+{
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+	set_msr_platform_info_enabled(vm, false);
+	vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+			"Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
+			run->exit_reason,
+			exit_reason_str(run->exit_reason));
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_run *state;
+	int rv;
+	uint64_t msr_platform_info;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
+	if (!rv) {
+		fprintf(stderr,
+			"KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n");
+		exit(KSFT_SKIP);
+	}
+
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
+	vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
+		msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+	test_msr_platform_info_enabled(vm);
+	test_msr_platform_info_disabled(vm);
+	vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c
new file mode 100644
index 000000000..881419d57
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_sregs_test.c
@@ -0,0 +1,54 @@
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID                  5
+
+int main(int argc, char *argv[])
+{
+	struct kvm_sregs sregs;
+	struct kvm_vm *vm;
+	int rc;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, NULL);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	sregs.apic_base = 1 << 10;
+	rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+		    sregs.apic_base);
+	sregs.apic_base = 1 << 11;
+	rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+	TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+		    sregs.apic_base);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/state_test.c b/tools/testing/selftests/kvm/state_test.c
new file mode 100644
index 000000000..900e3e9df
--- /dev/null
+++ b/tools/testing/selftests/kvm/state_test.c
@@ -0,0 +1,196 @@
+/*
+ * KVM_GET/SET_* tests
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Tests for vCPU state save/restore, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+#define VCPU_ID		5
+
+static bool have_nested_state;
+
+void l2_guest_code(void)
+{
+	GUEST_SYNC(5);
+
+        /* Exit to L1 */
+	vmcall();
+
+	/* L1 has now set up a shadow VMCS for us.  */
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+	GUEST_SYNC(9);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
+	GUEST_SYNC(10);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
+	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
+	GUEST_SYNC(11);
+
+	/* Done, exit to L1 and never come back.  */
+	vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+        unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+	GUEST_ASSERT(vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	GUEST_SYNC(3);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	GUEST_SYNC(4);
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	/* Check that the launched state is preserved.  */
+	GUEST_ASSERT(vmlaunch());
+
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_SYNC(6);
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
+
+	vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+	vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
+
+	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+	GUEST_ASSERT(vmlaunch());
+	GUEST_SYNC(7);
+	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(vmresume());
+
+	vmwrite(GUEST_RIP, 0xc0ffee);
+	GUEST_SYNC(8);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+
+	GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(vmresume());
+	GUEST_SYNC(12);
+	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+	GUEST_ASSERT(vmlaunch());
+	GUEST_ASSERT(vmresume());
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+	GUEST_SYNC(1);
+	GUEST_SYNC(2);
+
+	if (vmx_pages)
+		l1_guest_code(vmx_pages);
+
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct vmx_pages *vmx_pages = NULL;
+	vm_vaddr_t vmx_pages_gva = 0;
+
+	struct kvm_regs regs1, regs2;
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_x86_state *state;
+	int stage;
+
+	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+	run = vcpu_state(vm, VCPU_ID);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+	if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+		vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+		vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+	} else {
+		printf("will skip nested state checks\n");
+		vcpu_args_set(vm, VCPU_ID, 1, 0);
+	}
+
+	for (stage = 1;; stage++) {
+		_vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Unexpected exit reason: %u (%s),\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		memset(&regs1, 0, sizeof(regs1));
+		vcpu_regs_get(vm, VCPU_ID, &regs1);
+		switch (run->io.port) {
+		case GUEST_PORT_ABORT:
+			TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi,
+				    __FILE__, regs1.rsi);
+			/* NOT REACHED */
+		case GUEST_PORT_SYNC:
+			break;
+		case GUEST_PORT_DONE:
+			goto done;
+		default:
+			TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+		}
+
+		/* PORT_SYNC is handled here.  */
+		TEST_ASSERT(!strcmp((const char *)regs1.rdi, "hello") &&
+			    regs1.rsi == stage, "Unexpected register values vmexit #%lx, got %lx",
+			    stage, (ulong) regs1.rsi);
+
+		state = vcpu_save_state(vm, VCPU_ID);
+		kvm_vm_release(vm);
+
+		/* Restore state in a new VM.  */
+		kvm_vm_restart(vm, O_RDWR);
+		vm_vcpu_add(vm, VCPU_ID, 0, 0);
+		vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+		vcpu_load_state(vm, VCPU_ID, state);
+		run = vcpu_state(vm, VCPU_ID);
+		free(state);
+
+		memset(&regs2, 0, sizeof(regs2));
+		vcpu_regs_get(vm, VCPU_ID, &regs2);
+		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+			    (ulong) regs2.rdi, (ulong) regs2.rsi);
+	}
+
+done:
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c
new file mode 100644
index 000000000..213343e5d
--- /dev/null
+++ b/tools/testing/selftests/kvm/sync_regs_test.c
@@ -0,0 +1,237 @@
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+
+void guest_code(void)
+{
+	for (;;) {
+		GUEST_SYNC(0);
+		asm volatile ("inc %r11");
+	}
+}
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+	TEST_ASSERT(left->reg == right->reg, \
+		    "Register " #reg \
+		    " values did not match: 0x%llx, 0x%llx\n", \
+		    left->reg, right->reg)
+	REG_COMPARE(rax);
+	REG_COMPARE(rbx);
+	REG_COMPARE(rcx);
+	REG_COMPARE(rdx);
+	REG_COMPARE(rsi);
+	REG_COMPARE(rdi);
+	REG_COMPARE(rsp);
+	REG_COMPARE(rbp);
+	REG_COMPARE(r8);
+	REG_COMPARE(r9);
+	REG_COMPARE(r10);
+	REG_COMPARE(r11);
+	REG_COMPARE(r12);
+	REG_COMPARE(r13);
+	REG_COMPARE(r14);
+	REG_COMPARE(r15);
+	REG_COMPARE(rip);
+	REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+				struct kvm_vcpu_events *right)
+{
+}
+
+#define TEST_SYNC_FIELDS   (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_run *run;
+	struct kvm_regs regs;
+	struct kvm_sregs sregs;
+	struct kvm_vcpu_events events;
+	int rv, cap;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+	if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
+		fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
+		exit(KSFT_SKIP);
+	}
+	if ((cap & INVALID_SYNC_FIELD) != 0) {
+		fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
+		exit(KSFT_SKIP);
+	}
+
+	/* Create VM */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	/* Request reading invalid register set from VCPU. */
+	run->kvm_valid_regs = INVALID_SYNC_FIELD;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+	run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+	/* Request setting invalid register set into VCPU. */
+	run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+	run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(rv < 0 && errno == EINVAL,
+		    "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+		    rv);
+	vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+	/* Request and verify all valid register sets. */
+	/* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	compare_regs(&regs, &run->s.regs.regs);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	compare_sregs(&sregs, &run->s.regs.sregs);
+
+	vcpu_events_get(vm, VCPU_ID, &events);
+	compare_vcpu_events(&events, &run->s.regs.events);
+
+	/* Set and verify various register values. */
+	run->s.regs.regs.r11 = 0xBAD1DEA;
+	run->s.regs.sregs.apic_base = 1 << 11;
+	/* TODO run->s.regs.events.XYZ = ABC; */
+
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.r11);
+	TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+		    "apic_base sync regs value incorrect 0x%llx.",
+		    run->s.regs.sregs.apic_base);
+
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	compare_regs(&regs, &run->s.regs.regs);
+
+	vcpu_sregs_get(vm, VCPU_ID, &sregs);
+	compare_sregs(&sregs, &run->s.regs.sregs);
+
+	vcpu_events_get(vm, VCPU_ID, &events);
+	compare_vcpu_events(&events, &run->s.regs.events);
+
+	/* Clear kvm_dirty_regs bits, verify new s.regs values are
+	 * overwritten with existing guest values.
+	 */
+	run->kvm_valid_regs = TEST_SYNC_FIELDS;
+	run->kvm_dirty_regs = 0;
+	run->s.regs.regs.r11 = 0xDEADBEEF;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.r11);
+
+	/* Clear kvm_valid_regs bits and kvm_dirty_bits.
+	 * Verify s.regs values are not overwritten with existing guest values
+	 * and that guest values are not overwritten with kvm_sync_regs values.
+	 */
+	run->kvm_valid_regs = 0;
+	run->kvm_dirty_regs = 0;
+	run->s.regs.regs.r11 = 0xAAAA;
+	regs.r11 = 0xBAC0;
+	vcpu_regs_set(vm, VCPU_ID, &regs);
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.r11);
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	TEST_ASSERT(regs.r11 == 0xBAC0 + 1,
+		    "r11 guest value incorrect 0x%llx.",
+		    regs.r11);
+
+	/* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+	 * with existing guest values but that guest values are overwritten
+	 * with kvm_sync_regs values.
+	 */
+	run->kvm_valid_regs = 0;
+	run->kvm_dirty_regs = TEST_SYNC_FIELDS;
+	run->s.regs.regs.r11 = 0xBBBB;
+	rv = _vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Unexpected exit reason: %u (%s),\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB,
+		    "r11 sync regs value incorrect 0x%llx.",
+		    run->s.regs.regs.r11);
+	vcpu_regs_get(vm, VCPU_ID, &regs);
+	TEST_ASSERT(regs.r11 == 0xBBBB + 1,
+		    "r11 guest value incorrect 0x%llx.",
+		    regs.r11);
+
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
new file mode 100644
index 000000000..49bcc68b0
--- /dev/null
+++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
@@ -0,0 +1,175 @@
+/*
+ * gtests/tests/vmx_tsc_adjust_test.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * IA32_TSC_ADJUST test
+ *
+ * According to the SDM, "if an execution of WRMSR to the
+ * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
+ * the logical processor also adds (or subtracts) value X from the
+ * IA32_TSC_ADJUST MSR.
+ *
+ * Note that when L1 doesn't intercept writes to IA32_TSC, a
+ * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
+ * value.
+ *
+ * This test verifies that this unusual case is handled correctly.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "../kselftest.h"
+
+#ifndef MSR_IA32_TSC_ADJUST
+#define MSR_IA32_TSC_ADJUST 0x3b
+#endif
+
+#define PAGE_SIZE	4096
+#define VCPU_ID		5
+
+#define TSC_ADJUST_VALUE (1ll << 32)
+#define TSC_OFFSET_VALUE -(1ll << 48)
+
+enum {
+	PORT_ABORT = 0x1000,
+	PORT_REPORT,
+	PORT_DONE,
+};
+
+enum {
+	VMXON_PAGE = 0,
+	VMCS_PAGE,
+	MSR_BITMAP_PAGE,
+
+	NUM_VMX_PAGES,
+};
+
+struct kvm_single_msr {
+	struct kvm_msrs header;
+	struct kvm_msr_entry entry;
+} __attribute__((packed));
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void check_ia32_tsc_adjust(int64_t max)
+{
+	int64_t adjust;
+
+	adjust = rdmsr(MSR_IA32_TSC_ADJUST);
+	GUEST_SYNC(adjust);
+	GUEST_ASSERT(adjust <= max);
+}
+
+static void l2_guest_code(void)
+{
+	uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+
+	wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
+	check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+	/* Exit to L1 */
+	__asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	uint32_t control;
+	uintptr_t save_cr3;
+
+	GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
+	wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
+	check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+
+	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+
+	/* Prepare the VMCS for L2 execution. */
+	prepare_vmcs(vmx_pages, l2_guest_code,
+		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+	control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+	control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
+	vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+	vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+	/* Jump into L2.  First, test failure to load guest CR3.  */
+	save_cr3 = vmreadz(GUEST_CR3);
+	vmwrite(GUEST_CR3, -1ull);
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+		     (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+	check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+	vmwrite(GUEST_CR3, save_cr3);
+
+	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+	check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+	GUEST_DONE();
+}
+
+void report(int64_t val)
+{
+	printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+	       val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+}
+
+int main(int argc, char *argv[])
+{
+	struct vmx_pages *vmx_pages;
+	vm_vaddr_t vmx_pages_gva;
+	struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+	if (!(entry->ecx & CPUID_VMX)) {
+		fprintf(stderr, "nested VMX not enabled, skipping test\n");
+		exit(KSFT_SKIP);
+	}
+
+	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+	vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+	/* Allocate VMX pages and shared descriptors (vmx_pages). */
+	vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+	for (;;) {
+		volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+		struct guest_args args;
+
+		vcpu_run(vm, VCPU_ID);
+		guest_args_read(vm, VCPU_ID, &args);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+			    run->exit_reason,
+			    exit_reason_str(run->exit_reason));
+
+		switch (args.port) {
+		case GUEST_PORT_ABORT:
+			TEST_ASSERT(false, "%s", (const char *) args.arg0);
+			/* NOT REACHED */
+		case GUEST_PORT_SYNC:
+			report(args.arg1);
+			break;
+		case GUEST_PORT_DONE:
+			goto done;
+		default:
+			TEST_ASSERT(false, "Unknown port 0x%x.", args.port);
+		}
+	}
+
+	kvm_vm_free(vm);
+done:
+	return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
new file mode 100644
index 000000000..9700281be
--- /dev/null
+++ b/tools/testing/selftests/lib.mk
@@ -0,0 +1,169 @@
+# This mimics the top-level Makefile. We do it explicitly here so that this
+# Makefile can operate with or without the kbuild infrastructure.
+ifneq ($(LLVM),)
+CC := clang
+else
+CC := $(CROSS_COMPILE)gcc
+endif
+
+ifeq (0,$(MAKELEVEL))
+OUTPUT := $(shell pwd)
+endif
+
+# The following are built by lib.mk common compile rules.
+# TEST_CUSTOM_PROGS should be used by tests that require
+# custom build rule and prevent common build rule use.
+# TEST_PROGS are for test shell scripts.
+# TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests
+# and install targets. Common clean doesn't touch them.
+TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
+TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
+
+ifdef KSFT_KHDR_INSTALL
+top_srcdir ?= ../../../..
+include $(top_srcdir)/scripts/subarch.include
+ARCH		?= $(SUBARCH)
+
+.PHONY: khdr
+.NOTPARALLEL:
+khdr:
+	make ARCH=$(ARCH) -C $(top_srcdir) headers_install
+
+all: khdr $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+else
+all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+endif
+
+.ONESHELL:
+define RUN_TEST_PRINT_RESULT
+	TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST";	\
+	echo $$TEST_HDR_MSG;					\
+	echo "========================================";	\
+	if [ ! -x $$TEST ]; then	\
+		echo "$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.";\
+		echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+	else					\
+		cd `dirname $$TEST` > /dev/null; \
+		if [ "X$(summary)" != "X" ]; then	\
+			(./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && \
+			echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
+			(if [ $$? -eq $$skip ]; then	\
+				echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]";				\
+			else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]";					\
+			fi;)			\
+		else				\
+			(./$$BASENAME_TEST &&	\
+			echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") ||						\
+			(if [ $$? -eq $$skip ]; then \
+				echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
+			else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]";				\
+			fi;)		\
+		fi;				\
+		cd - > /dev/null;		\
+	fi;
+endef
+
+define RUN_TESTS
+	@export KSFT_TAP_LEVEL=`echo 1`;		\
+	test_num=`echo 0`;				\
+	skip=`echo 4`;					\
+	echo "TAP version 13";				\
+	for TEST in $(1); do				\
+		BASENAME_TEST=`basename $$TEST`;	\
+		test_num=`echo $$test_num+1 | bc`;	\
+		$(call RUN_TEST_PRINT_RESULT,$(TEST),$(BASENAME_TEST),$(test_num),$(skip))						\
+	done;
+endef
+
+run_tests: all
+ifneq ($(KBUILD_SRC),)
+	@if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
+		@rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+	fi
+	@if [ "X$(TEST_PROGS)" != "X" ]; then
+		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
+	else
+		$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+	fi
+else
+	$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+endif
+
+define INSTALL_SINGLE_RULE
+	$(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH))
+	$(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+	$(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+endef
+
+define INSTALL_RULE
+	$(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE)
+	$(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
+	$(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE)
+	$(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE)
+	$(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE)
+	$(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
+	$(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE)
+endef
+
+install: all
+ifdef INSTALL_PATH
+	$(INSTALL_RULE)
+else
+	$(error Error: set INSTALL_PATH to use install)
+endif
+
+define EMIT_TESTS
+	@test_num=`echo 0`;				\
+	for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
+		BASENAME_TEST=`basename $$TEST`;	\
+		test_num=`echo $$test_num+1 | bc`;	\
+		TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST";	\
+		echo "echo $$TEST_HDR_MSG";	\
+		if [ ! -x $$TEST ]; then	\
+			echo "echo \"$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.\"";		\
+			echo "echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\""; \
+		else
+			echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"ok 1..$$test_num $$TEST_HDR_MSG [PASS]\") || (if [ \$$? -eq \$$skip ]; then echo \"not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]\"; else echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\"; fi;)"; \
+		fi;		\
+	done;
+endef
+
+emit_tests:
+	$(EMIT_TESTS)
+
+# define if isn't already. It is undefined in make O= case.
+ifeq ($(RM),)
+RM := rm -f
+endif
+
+define CLEAN
+	$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+endef
+
+clean:
+	$(CLEAN)
+
+# When make O= with kselftest target from main level
+# the following aren't defined.
+#
+ifneq ($(KBUILD_SRC),)
+LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
+LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+endif
+
+# Selftest makefiles can override those targets by setting
+# OVERRIDE_TARGETS = 1.
+ifeq ($(OVERRIDE_TARGETS),)
+$(OUTPUT)/%:%.c
+	$(LINK.c) $^ $(LDLIBS) -o $@
+
+$(OUTPUT)/%.o:%.S
+	$(COMPILE.S) $^ -o $@
+
+$(OUTPUT)/%:%.S
+	$(LINK.S) $^ $(LDLIBS) -o $@
+endif
+
+.PHONY: run_tests all clean install emit_tests
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
new file mode 100644
index 000000000..70d5711e3
--- /dev/null
+++ b/tools/testing/selftests/lib/Makefile
@@ -0,0 +1,8 @@
+# Makefile for lib/ function selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
new file mode 100755
index 000000000..5a90006d1
--- /dev/null
+++ b/tools/testing/selftests/lib/bitmap.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Runs bitmap infrastructure tests using test_bitmap kernel module
+if ! /sbin/modprobe -q -n test_bitmap; then
+	echo "bitmap: module test_bitmap is not found [SKIP]"
+	exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test_bitmap; then
+	/sbin/modprobe -q -r test_bitmap
+	echo "bitmap: ok"
+else
+	echo "bitmap: [FAIL]"
+	exit 1
+fi
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
new file mode 100644
index 000000000..126933bcc
--- /dev/null
+++ b/tools/testing/selftests/lib/config
@@ -0,0 +1,3 @@
+CONFIG_TEST_PRINTF=m
+CONFIG_TEST_BITMAP=m
+CONFIG_PRIME_NUMBERS=m
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
new file mode 100755
index 000000000..78e7483c8
--- /dev/null
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Checks fast/slow prime_number generation for inconsistencies
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n prime_numbers; then
+	echo "prime_numbers: module prime_numbers is not found [SKIP]"
+	exit $ksft_skip
+fi
+
+if /sbin/modprobe -q prime_numbers selftest=65536; then
+	/sbin/modprobe -q -r prime_numbers
+	echo "prime_numbers: ok"
+else
+	echo "prime_numbers: [FAIL]"
+	exit 1
+fi
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
new file mode 100755
index 000000000..45a23e2d6
--- /dev/null
+++ b/tools/testing/selftests/lib/printf.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs printf infrastructure using test_printf kernel module
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_printf; then
+	echo "printf: module test_printf is not found [SKIP]"
+	exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test_printf; then
+	/sbin/modprobe -q -r test_printf
+	echo "printf: ok"
+else
+	echo "printf: [FAIL]"
+	exit 1
+fi
diff --git a/tools/testing/selftests/locking/Makefile b/tools/testing/selftests/locking/Makefile
new file mode 100644
index 000000000..6e7761ab3
--- /dev/null
+++ b/tools/testing/selftests/locking/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for locking/ww_mutx selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := ww_mutex.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh
new file mode 100755
index 000000000..91e4ac756
--- /dev/null
+++ b/tools/testing/selftests/locking/ww_mutex.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Runs API tests for struct ww_mutex (Wait/Wound mutexes)
+if ! /sbin/modprobe -q -n test-ww_mutex; then
+	echo "ww_mutex: module test-ww_mutex is not found [SKIP]"
+	exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test-ww_mutex; then
+       /sbin/modprobe -q -r test-ww_mutex
+       echo "locking/ww_mutex: ok"
+else
+       echo "locking/ww_mutex: [FAIL]"
+       exit 1
+fi
diff --git a/tools/testing/selftests/media_tests/.gitignore b/tools/testing/selftests/media_tests/.gitignore
new file mode 100644
index 000000000..8745eba39
--- /dev/null
+++ b/tools/testing/selftests/media_tests/.gitignore
@@ -0,0 +1,3 @@
+media_device_test
+media_device_open
+video_device_test
diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
new file mode 100644
index 000000000..60826d7d3
--- /dev/null
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+CFLAGS += -I../ -I../../../../usr/include/
+TEST_GEN_PROGS := media_device_test media_device_open video_device_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/media_tests/bind_unbind_sample.sh b/tools/testing/selftests/media_tests/bind_unbind_sample.sh
new file mode 100755
index 000000000..0101c1ec4
--- /dev/null
+++ b/tools/testing/selftests/media_tests/bind_unbind_sample.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Find device number in /sys/bus/usb/drivers/drivername
+# Edit this file to update the driver numer and name
+# Example test for uvcvideo driver
+#i=0
+# while :; do
+#  i=$((i+1))
+#  echo 1-5:1.0 > /sys/bus/usb/drivers/uvcvideo/unbind;
+#  echo 1-5:1.0 > /sys/bus/usb/drivers/uvcvideo/bind;
+#  clear
+#	echo $i
+#done
diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
new file mode 100644
index 000000000..93183a37b
--- /dev/null
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * media_device_open.c - Media Controller Device Open Test
+ *
+ * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ *
+ */
+
+/*
+ * This file adds a test for Media Controller API.
+ * This test should be run as root and should not be
+ * included in the Kselftest run. This test should be
+ * run when hardware and driver that makes use Media
+ * Controller API are present in the system.
+ *
+ * This test opens user specified Media Device and calls
+ * MEDIA_IOC_DEVICE_INFO ioctl, closes the file, and exits.
+ *
+ * Usage:
+ *	sudo ./media_device_open -d /dev/mediaX
+ *
+ *	Run this test is a loop and run bind/unbind on the driver.
+*/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <linux/media.h>
+
+#include "../kselftest.h"
+
+int main(int argc, char **argv)
+{
+	int opt;
+	char media_device[256];
+	int count = 0;
+	struct media_device_info mdi;
+	int ret;
+	int fd;
+
+	if (argc < 2) {
+		printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+		exit(-1);
+	}
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "d:")) != -1) {
+		switch (opt) {
+		case 'd':
+			strncpy(media_device, optarg, sizeof(media_device) - 1);
+			media_device[sizeof(media_device)-1] = '\0';
+			break;
+		default:
+			printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+			exit(-1);
+		}
+	}
+
+	if (getuid() != 0)
+		ksft_exit_skip("Please run the test as root - Exiting.\n");
+
+	/* Open Media device and keep it open */
+	fd = open(media_device, O_RDWR);
+	if (fd == -1) {
+		printf("Media Device open errno %s\n", strerror(errno));
+		exit(-1);
+	}
+
+	ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
+	if (ret < 0)
+		printf("Media Device Info errno %s\n", strerror(errno));
+	else
+		printf("Media device model %s driver %s\n",
+			mdi.model, mdi.driver);
+}
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
new file mode 100644
index 000000000..4b9953359
--- /dev/null
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * media_device_test.c - Media Controller Device ioctl loop Test
+ *
+ * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ *
+ */
+
+/*
+ * This file adds a test for Media Controller API.
+ * This test should be run as root and should not be
+ * included in the Kselftest run. This test should be
+ * run when hardware and driver that makes use Media
+ * Controller API are present in the system.
+ *
+ * This test opens user specified Media Device and calls
+ * MEDIA_IOC_DEVICE_INFO ioctl in a loop once every 10
+ * seconds.
+ *
+ * Usage:
+ *	sudo ./media_device_test -d /dev/mediaX
+ *
+ *	While test is running, remove the device and
+ *	ensure there are no use after free errors and
+ *	other Oops in the dmesg. Enable KaSan kernel
+ *	config option for use-after-free error detection.
+*/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <linux/media.h>
+
+#include "../kselftest.h"
+
+int main(int argc, char **argv)
+{
+	int opt;
+	char media_device[256];
+	int count;
+	struct media_device_info mdi;
+	int ret;
+	int fd;
+
+	if (argc < 2) {
+		printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+		exit(-1);
+	}
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "d:")) != -1) {
+		switch (opt) {
+		case 'd':
+			strncpy(media_device, optarg, sizeof(media_device) - 1);
+			media_device[sizeof(media_device)-1] = '\0';
+			break;
+		default:
+			printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+			exit(-1);
+		}
+	}
+
+	if (getuid() != 0)
+		ksft_exit_skip("Please run the test as root - Exiting.\n");
+
+	/* Generate random number of interations */
+	srand((unsigned int) time(NULL));
+	count = rand();
+
+	/* Open Media device and keep it open */
+	fd = open(media_device, O_RDWR);
+	if (fd == -1) {
+		printf("Media Device open errno %s\n", strerror(errno));
+		exit(-1);
+	}
+
+	printf("\nNote:\n"
+	       "While test is running, remove the device and\n"
+	       "ensure there are no use after free errors and\n"
+	       "other Oops in the dmesg. Enable KaSan kernel\n"
+	       "config option for use-after-free error detection.\n\n");
+
+	printf("Running test for %d iterations\n", count);
+
+	while (count > 0) {
+		ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
+		if (ret < 0)
+			printf("Media Device Info errno %s\n", strerror(errno));
+		else
+			printf("Media device model %s driver %s - count %d\n",
+				mdi.model, mdi.driver, count);
+		sleep(10);
+		count--;
+	}
+}
diff --git a/tools/testing/selftests/media_tests/open_loop_test.sh b/tools/testing/selftests/media_tests/open_loop_test.sh
new file mode 100755
index 000000000..d4c0179bb
--- /dev/null
+++ b/tools/testing/selftests/media_tests/open_loop_test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+ i=0
+file=/dev/media$1
+ while :; do
+  echo $file
+  i=$((i+1))
+  R=$(./media_device_open -d $file);
+ # clear
+  echo -e "Loop $i\n$R"
+ done
diff --git a/tools/testing/selftests/media_tests/regression_test.txt b/tools/testing/selftests/media_tests/regression_test.txt
new file mode 100644
index 000000000..262736768
--- /dev/null
+++ b/tools/testing/selftests/media_tests/regression_test.txt
@@ -0,0 +1,43 @@
+Testing for regressions in Media Controller API register, ioctl, syscall,
+and unregister paths. There have a few problems that result in user-after
+free on media_device, media_devnode, and cdev pointers when the driver is
+unbound while ioctl is in progress.
+
+Test Procedure:
+
+Run bin/unbind loop while ioctls are in progress.
+Run rmmod and modprobe.
+Disconnect the device.
+
+Setup:
+
+Build media_device_test
+cd tools/testing/selftests/media_tests
+make
+
+Regressions test for cdev user-after free error on /dev/mediaX when driver
+is unbound:
+
+Start media_device_test to regression test media devnode dynamic alloc
+and cdev user-after-free fixes. This opens media dev files and sits in
+a loop running media ioctl MEDIA_IOC_DEVICE_INFO command once every 10
+seconds. The idea is when device file goes away, media devnode and cdev
+should stick around until this test exits.
+
+The test for a random number of iterations or until user kills it with a
+sleep 10 in between the ioctl calls.
+
+sudo ./media_device_test -d /dev/mediaX
+
+Regression test for media_devnode unregister race with ioctl_syscall:
+
+Start 6 open_loop_test.sh tests with different /dev/mediaX files. When
+device file goes away after unbind, device file name changes. Start the
+test with possible device names. If we start with /dev/media0 for example,
+after unbind, /dev/media1 or /dev/media2 could get created. The idea is
+keep ioctls going while bind/unbind runs.
+
+Copy bind_unbind_sample.txt and make changes to specify the driver name
+and number to run bind and unbind. Start the bind_unbind.sh
+
+Run dmesg looking for any user-after free errors or mutex lock errors.
diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c
new file mode 100644
index 000000000..0f6aef2e2
--- /dev/null
+++ b/tools/testing/selftests/media_tests/video_device_test.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * video_device_test - Video Device Test
+ *
+ * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ *
+ */
+
+/*
+ * This file adds a test for Video Device. This test should not be included
+ * in the Kselftest run. This test should be run when hardware and driver
+ * that makes use of V4L2 API is present.
+ *
+ * This test opens user specified Video Device and calls video ioctls in a
+ * loop once every 10 seconds.
+ *
+ * Usage:
+ *	sudo ./video_device_test -d /dev/videoX
+ *
+ *	While test is running, remove the device or unbind the driver and
+ *	ensure there are no use after free errors and other Oops in the
+ *	dmesg.
+ *	When possible, enable KaSan kernel config option for use-after-free
+ *	error detection.
+*/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <linux/videodev2.h>
+
+int main(int argc, char **argv)
+{
+	int opt;
+	char video_dev[256];
+	int count;
+	struct v4l2_tuner vtuner;
+	struct v4l2_capability vcap;
+	int ret;
+	int fd;
+
+	if (argc < 2) {
+		printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+		exit(-1);
+	}
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "d:")) != -1) {
+		switch (opt) {
+		case 'd':
+			strncpy(video_dev, optarg, sizeof(video_dev) - 1);
+			video_dev[sizeof(video_dev)-1] = '\0';
+			break;
+		default:
+			printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+			exit(-1);
+		}
+	}
+
+	/* Generate random number of interations */
+	srand((unsigned int) time(NULL));
+	count = rand();
+
+	/* Open Video device and keep it open */
+	fd = open(video_dev, O_RDWR);
+	if (fd == -1) {
+		printf("Video Device open errno %s\n", strerror(errno));
+		exit(-1);
+	}
+
+	printf("\nNote:\n"
+	       "While test is running, remove the device or unbind\n"
+	       "driver and ensure there are no use after free errors\n"
+	       "and other Oops in the dmesg. When possible, enable KaSan\n"
+	       "kernel config option for use-after-free error detection.\n\n");
+
+	while (count > 0) {
+		ret = ioctl(fd, VIDIOC_QUERYCAP, &vcap);
+		if (ret < 0)
+			printf("VIDIOC_QUERYCAP errno %s\n", strerror(errno));
+		else
+			printf("Video device driver %s\n", vcap.driver);
+
+		ret = ioctl(fd, VIDIOC_G_TUNER, &vtuner);
+		if (ret < 0)
+			printf("VIDIOC_G_TUNER, errno %s\n", strerror(errno));
+		else
+			printf("type %d rangelow %d rangehigh %d\n",
+				vtuner.type, vtuner.rangelow, vtuner.rangehigh);
+		sleep(10);
+		count--;
+	}
+}
diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore
new file mode 100644
index 000000000..020c44f49
--- /dev/null
+++ b/tools/testing/selftests/membarrier/.gitignore
@@ -0,0 +1 @@
+membarrier_test
diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile
new file mode 100644
index 000000000..02845532b
--- /dev/null
+++ b/tools/testing/selftests/membarrier/Makefile
@@ -0,0 +1,6 @@
+CFLAGS += -g -I../../../../usr/include/
+
+TEST_GEN_PROGS := membarrier_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
new file mode 100644
index 000000000..6793f8ecc
--- /dev/null
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+static int sys_membarrier(int cmd, int flags)
+{
+	return syscall(__NR_membarrier, cmd, flags);
+}
+
+static int test_membarrier_cmd_fail(void)
+{
+	int cmd = -1, flags = 0;
+	const char *test_name = "sys membarrier invalid command";
+
+	if (sys_membarrier(cmd, flags) != -1) {
+		ksft_exit_fail_msg(
+			"%s test: command = %d, flags = %d. Should fail, but passed\n",
+			test_name, cmd, flags);
+	}
+	if (errno != EINVAL) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+			test_name, flags, EINVAL, strerror(EINVAL),
+			errno, strerror(errno));
+	}
+
+	ksft_test_result_pass(
+		"%s test: command = %d, flags = %d, errno = %d. Failed as expected\n",
+		test_name, cmd, flags, errno);
+	return 0;
+}
+
+static int test_membarrier_flags_fail(void)
+{
+	int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags";
+
+	if (sys_membarrier(cmd, flags) != -1) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should fail, but passed\n",
+			test_name, flags);
+	}
+	if (errno != EINVAL) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+			test_name, flags, EINVAL, strerror(EINVAL),
+			errno, strerror(errno));
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d, errno = %d. Failed as expected\n",
+		test_name, flags, errno);
+	return 0;
+}
+
+static int test_membarrier_global_success(void)
+{
+	int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n", test_name, flags);
+	return 0;
+}
+
+static int test_membarrier_private_expedited_fail(void)
+{
+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure";
+
+	if (sys_membarrier(cmd, flags) != -1) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should fail, but passed\n",
+			test_name, flags);
+	}
+	if (errno != EPERM) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+			test_name, flags, EPERM, strerror(EPERM),
+			errno, strerror(errno));
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d, errno = %d\n",
+		test_name, flags, errno);
+	return 0;
+}
+
+static int test_membarrier_register_private_expedited_success(void)
+{
+	int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
+static int test_membarrier_private_expedited_success(void)
+{
+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
+static int test_membarrier_private_expedited_sync_core_fail(void)
+{
+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure";
+
+	if (sys_membarrier(cmd, flags) != -1) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should fail, but passed\n",
+			test_name, flags);
+	}
+	if (errno != EPERM) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+			test_name, flags, EPERM, strerror(EPERM),
+			errno, strerror(errno));
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d, errno = %d\n",
+		test_name, flags, errno);
+	return 0;
+}
+
+static int test_membarrier_register_private_expedited_sync_core_success(void)
+{
+	int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
+static int test_membarrier_private_expedited_sync_core_success(void)
+{
+	int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
+static int test_membarrier_register_global_expedited_success(void)
+{
+	int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
+static int test_membarrier_global_expedited_success(void)
+{
+	int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0;
+	const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED";
+
+	if (sys_membarrier(cmd, flags) != 0) {
+		ksft_exit_fail_msg(
+			"%s test: flags = %d, errno = %d\n",
+			test_name, flags, errno);
+	}
+
+	ksft_test_result_pass(
+		"%s test: flags = %d\n",
+		test_name, flags);
+	return 0;
+}
+
+static int test_membarrier(void)
+{
+	int status;
+
+	status = test_membarrier_cmd_fail();
+	if (status)
+		return status;
+	status = test_membarrier_flags_fail();
+	if (status)
+		return status;
+	status = test_membarrier_global_success();
+	if (status)
+		return status;
+	status = test_membarrier_private_expedited_fail();
+	if (status)
+		return status;
+	status = test_membarrier_register_private_expedited_success();
+	if (status)
+		return status;
+	status = test_membarrier_private_expedited_success();
+	if (status)
+		return status;
+	status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
+	if (status < 0) {
+		ksft_test_result_fail("sys_membarrier() failed\n");
+		return status;
+	}
+	if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
+		status = test_membarrier_private_expedited_sync_core_fail();
+		if (status)
+			return status;
+		status = test_membarrier_register_private_expedited_sync_core_success();
+		if (status)
+			return status;
+		status = test_membarrier_private_expedited_sync_core_success();
+		if (status)
+			return status;
+	}
+	/*
+	 * It is valid to send a global membarrier from a non-registered
+	 * process.
+	 */
+	status = test_membarrier_global_expedited_success();
+	if (status)
+		return status;
+	status = test_membarrier_register_global_expedited_success();
+	if (status)
+		return status;
+	status = test_membarrier_global_expedited_success();
+	if (status)
+		return status;
+	return 0;
+}
+
+static int test_membarrier_query(void)
+{
+	int flags = 0, ret;
+
+	ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags);
+	if (ret < 0) {
+		if (errno == ENOSYS) {
+			/*
+			 * It is valid to build a kernel with
+			 * CONFIG_MEMBARRIER=n. However, this skips the tests.
+			 */
+			ksft_exit_skip(
+				"sys membarrier (CONFIG_MEMBARRIER) is disabled.\n");
+		}
+		ksft_exit_fail_msg("sys_membarrier() failed\n");
+	}
+	if (!(ret & MEMBARRIER_CMD_GLOBAL))
+		ksft_exit_skip(
+			"sys_membarrier unsupported: CMD_GLOBAL not found.\n");
+
+	ksft_test_result_pass("sys_membarrier available\n");
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	ksft_print_header();
+
+	test_membarrier_query();
+	test_membarrier();
+
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/memfd/.gitignore b/tools/testing/selftests/memfd/.gitignore
new file mode 100644
index 000000000..afe87c40a
--- /dev/null
+++ b/tools/testing/selftests/memfd/.gitignore
@@ -0,0 +1,4 @@
+fuse_mnt
+fuse_test
+memfd_test
+memfd-test-file
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
new file mode 100644
index 000000000..53a848109
--- /dev/null
+++ b/tools/testing/selftests/memfd/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -D_FILE_OFFSET_BITS=64
+CFLAGS += -I../../../../include/uapi/
+CFLAGS += -I../../../../include/
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := memfd_test
+TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
+TEST_GEN_FILES := fuse_mnt fuse_test
+
+fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
+
+include ../lib.mk
+
+$(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs)
+
+$(OUTPUT)/memfd_test: memfd_test.c common.o
+$(OUTPUT)/fuse_test: fuse_test.c common.o
+
+EXTRA_CLEAN = common.o
diff --git a/tools/testing/selftests/memfd/common.c b/tools/testing/selftests/memfd/common.c
new file mode 100644
index 000000000..8eb3d75f6
--- /dev/null
+++ b/tools/testing/selftests/memfd/common.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "common.h"
+
+int hugetlbfs_test = 0;
+
+/*
+ * Copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+	unsigned long hps = 0;
+	char *line = NULL;
+	size_t linelen = 0;
+	FILE *f = fopen("/proc/meminfo", "r");
+
+	if (!f)
+		return 0;
+	while (getline(&line, &linelen, f) > 0) {
+		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
+			hps <<= 10;
+			break;
+		}
+	}
+
+	free(line);
+	fclose(f);
+	return hps;
+}
+
+int sys_memfd_create(const char *name, unsigned int flags)
+{
+	if (hugetlbfs_test)
+		flags |= MFD_HUGETLB;
+
+	return syscall(__NR_memfd_create, name, flags);
+}
diff --git a/tools/testing/selftests/memfd/common.h b/tools/testing/selftests/memfd/common.h
new file mode 100644
index 000000000..522d2c630
--- /dev/null
+++ b/tools/testing/selftests/memfd/common.h
@@ -0,0 +1,9 @@
+#ifndef COMMON_H_
+#define COMMON_H_
+
+extern int hugetlbfs_test;
+
+unsigned long default_huge_page_size(void);
+int sys_memfd_create(const char *name, unsigned int flags);
+
+#endif
diff --git a/tools/testing/selftests/memfd/config b/tools/testing/selftests/memfd/config
new file mode 100644
index 000000000..835c7f4da
--- /dev/null
+++ b/tools/testing/selftests/memfd/config
@@ -0,0 +1 @@
+CONFIG_FUSE_FS=m
diff --git a/tools/testing/selftests/memfd/fuse_mnt.c b/tools/testing/selftests/memfd/fuse_mnt.c
new file mode 100644
index 000000000..6936f2a00
--- /dev/null
+++ b/tools/testing/selftests/memfd/fuse_mnt.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * memfd test file-system
+ * This file uses FUSE to create a dummy file-system with only one file /memfd.
+ * This file is read-only and takes 1s per read.
+ *
+ * This file-system is used by the memfd test-cases to force the kernel to pin
+ * pages during reads(). Due to the 1s delay of this file-system, this is a
+ * nice way to test race-conditions against get_user_pages() in the kernel.
+ *
+ * We use direct_io==1 to force the kernel to use direct-IO for this
+ * file-system.
+ */
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static const char memfd_content[] = "memfd-example-content";
+static const char memfd_path[] = "/memfd";
+
+static int memfd_getattr(const char *path, struct stat *st)
+{
+	memset(st, 0, sizeof(*st));
+
+	if (!strcmp(path, "/")) {
+		st->st_mode = S_IFDIR | 0755;
+		st->st_nlink = 2;
+	} else if (!strcmp(path, memfd_path)) {
+		st->st_mode = S_IFREG | 0444;
+		st->st_nlink = 1;
+		st->st_size = strlen(memfd_content);
+	} else {
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int memfd_readdir(const char *path,
+			 void *buf,
+			 fuse_fill_dir_t filler,
+			 off_t offset,
+			 struct fuse_file_info *fi)
+{
+	if (strcmp(path, "/"))
+		return -ENOENT;
+
+	filler(buf, ".", NULL, 0);
+	filler(buf, "..", NULL, 0);
+	filler(buf, memfd_path + 1, NULL, 0);
+
+	return 0;
+}
+
+static int memfd_open(const char *path, struct fuse_file_info *fi)
+{
+	if (strcmp(path, memfd_path))
+		return -ENOENT;
+
+	if ((fi->flags & 3) != O_RDONLY)
+		return -EACCES;
+
+	/* force direct-IO */
+	fi->direct_io = 1;
+
+	return 0;
+}
+
+static int memfd_read(const char *path,
+		      char *buf,
+		      size_t size,
+		      off_t offset,
+		      struct fuse_file_info *fi)
+{
+	size_t len;
+
+	if (strcmp(path, memfd_path) != 0)
+		return -ENOENT;
+
+	sleep(1);
+
+	len = strlen(memfd_content);
+	if (offset < len) {
+		if (offset + size > len)
+			size = len - offset;
+
+		memcpy(buf, memfd_content + offset, size);
+	} else {
+		size = 0;
+	}
+
+	return size;
+}
+
+static struct fuse_operations memfd_ops = {
+	.getattr	= memfd_getattr,
+	.readdir	= memfd_readdir,
+	.open		= memfd_open,
+	.read		= memfd_read,
+};
+
+int main(int argc, char *argv[])
+{
+	return fuse_main(argc, argv, &memfd_ops, NULL);
+}
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
new file mode 100644
index 000000000..b018e8357
--- /dev/null
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * memfd GUP test-case
+ * This tests memfd interactions with get_user_pages(). We require the
+ * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
+ * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
+ * read() on files in that file-system will pin the receive-buffer pages for at
+ * least 1s via get_user_pages().
+ *
+ * We use this trick to race ADD_SEALS against a write on a memfd object. The
+ * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
+ * the read() syscall with our memory-mapped memfd object as receive buffer to
+ * force the kernel to write into our memfd object.
+ */
+
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "common.h"
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65536
+
+static size_t mfd_def_size = MFD_DEF_SIZE;
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+	int r, fd;
+
+	fd = sys_memfd_create(name, flags);
+	if (fd < 0) {
+		printf("memfd_create(\"%s\", %u) failed: %m\n",
+		       name, flags);
+		abort();
+	}
+
+	r = ftruncate(fd, sz);
+	if (r < 0) {
+		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+		abort();
+	}
+
+	return fd;
+}
+
+static __u64 mfd_assert_get_seals(int fd)
+{
+	long r;
+
+	r = fcntl(fd, F_GET_SEALS);
+	if (r < 0) {
+		printf("GET_SEALS(%d) failed: %m\n", fd);
+		abort();
+	}
+
+	return r;
+}
+
+static void mfd_assert_has_seals(int fd, __u64 seals)
+{
+	__u64 s;
+
+	s = mfd_assert_get_seals(fd);
+	if (s != seals) {
+		printf("%llu != %llu = GET_SEALS(%d)\n",
+		       (unsigned long long)seals, (unsigned long long)s, fd);
+		abort();
+	}
+}
+
+static void mfd_assert_add_seals(int fd, __u64 seals)
+{
+	long r;
+	__u64 s;
+
+	s = mfd_assert_get_seals(fd);
+	r = fcntl(fd, F_ADD_SEALS, seals);
+	if (r < 0) {
+		printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
+		       fd, (unsigned long long)s, (unsigned long long)seals);
+		abort();
+	}
+}
+
+static int mfd_busy_add_seals(int fd, __u64 seals)
+{
+	long r;
+	__u64 s;
+
+	r = fcntl(fd, F_GET_SEALS);
+	if (r < 0)
+		s = 0;
+	else
+		s = r;
+
+	r = fcntl(fd, F_ADD_SEALS, seals);
+	if (r < 0 && errno != EBUSY) {
+		printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
+		       fd, (unsigned long long)s, (unsigned long long)seals);
+		abort();
+	}
+
+	return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+	void *p;
+
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ | PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+	void *p;
+
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	return p;
+}
+
+static int global_mfd = -1;
+static void *global_p = NULL;
+
+static int sealing_thread_fn(void *arg)
+{
+	int sig, r;
+
+	/*
+	 * This thread first waits 200ms so any pending operation in the parent
+	 * is correctly started. After that, it tries to seal @global_mfd as
+	 * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
+	 * that memory mapped object still ongoing.
+	 * We then wait one more second and try sealing again. This time it
+	 * must succeed as there shouldn't be anyone else pinning the pages.
+	 */
+
+	/* wait 200ms for FUSE-request to be active */
+	usleep(200000);
+
+	/* unmount mapping before sealing to avoid i_mmap_writable failures */
+	munmap(global_p, mfd_def_size);
+
+	/* Try sealing the global file; expect EBUSY or success. Current
+	 * kernels will never succeed, but in the future, kernels might
+	 * implement page-replacements or other fancy ways to avoid racing
+	 * writes. */
+	r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
+	if (r >= 0) {
+		printf("HURRAY! This kernel fixed GUP races!\n");
+	} else {
+		/* wait 1s more so the FUSE-request is done */
+		sleep(1);
+
+		/* try sealing the global file again */
+		mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
+	}
+
+	return 0;
+}
+
+static pid_t spawn_sealing_thread(void)
+{
+	uint8_t *stack;
+	pid_t pid;
+
+	stack = malloc(STACK_SIZE);
+	if (!stack) {
+		printf("malloc(STACK_SIZE) failed: %m\n");
+		abort();
+	}
+
+	pid = clone(sealing_thread_fn,
+		    stack + STACK_SIZE,
+		    SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
+		    NULL);
+	if (pid < 0) {
+		printf("clone() failed: %m\n");
+		abort();
+	}
+
+	return pid;
+}
+
+static void join_sealing_thread(pid_t pid)
+{
+	waitpid(pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+	char *zero;
+	int fd, mfd, r;
+	void *p;
+	int was_sealed;
+	pid_t pid;
+
+	if (argc < 2) {
+		printf("error: please pass path to file in fuse_mnt mount-point\n");
+		abort();
+	}
+
+	if (argc >= 3) {
+		if (!strcmp(argv[2], "hugetlbfs")) {
+			unsigned long hpage_size = default_huge_page_size();
+
+			if (!hpage_size) {
+				printf("Unable to determine huge page size\n");
+				abort();
+			}
+
+			hugetlbfs_test = 1;
+			mfd_def_size = hpage_size * 2;
+		} else {
+			printf("Unknown option: %s\n", argv[2]);
+			abort();
+		}
+	}
+
+	zero = calloc(sizeof(*zero), mfd_def_size);
+
+	/* open FUSE memfd file for GUP testing */
+	printf("opening: %s\n", argv[1]);
+	fd = open(argv[1], O_RDONLY | O_CLOEXEC);
+	if (fd < 0) {
+		printf("cannot open(\"%s\"): %m\n", argv[1]);
+		abort();
+	}
+
+	/* create new memfd-object */
+	mfd = mfd_assert_new("kern_memfd_fuse",
+			     mfd_def_size,
+			     MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+	/* mmap memfd-object for writing */
+	p = mfd_assert_mmap_shared(mfd);
+
+	/* pass mfd+mapping to a separate sealing-thread which tries to seal
+	 * the memfd objects with SEAL_WRITE while we write into it */
+	global_mfd = mfd;
+	global_p = p;
+	pid = spawn_sealing_thread();
+
+	/* Use read() on the FUSE file to read into our memory-mapped memfd
+	 * object. This races the other thread which tries to seal the
+	 * memfd-object.
+	 * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
+	 * This guarantees that the receive-buffer is pinned for 1s until the
+	 * data is written into it. The racing ADD_SEALS should thus fail as
+	 * the pages are still pinned. */
+	r = read(fd, p, mfd_def_size);
+	if (r < 0) {
+		printf("read() failed: %m\n");
+		abort();
+	} else if (!r) {
+		printf("unexpected EOF on read()\n");
+		abort();
+	}
+
+	was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
+
+	/* Wait for sealing-thread to finish and verify that it
+	 * successfully sealed the file after the second try. */
+	join_sealing_thread(pid);
+	mfd_assert_has_seals(mfd, F_SEAL_WRITE);
+
+	/* *IF* the memfd-object was sealed at the time our read() returned,
+	 * then the kernel did a page-replacement or canceled the read() (or
+	 * whatever magic it did..). In that case, the memfd object is still
+	 * all zero.
+	 * In case the memfd-object was *not* sealed, the read() was successfull
+	 * and the memfd object must *not* be all zero.
+	 * Note that in real scenarios, there might be a mixture of both, but
+	 * in this test-cases, we have explicit 200ms delays which should be
+	 * enough to avoid any in-flight writes. */
+
+	p = mfd_assert_mmap_private(mfd);
+	if (was_sealed && memcmp(p, zero, mfd_def_size)) {
+		printf("memfd sealed during read() but data not discarded\n");
+		abort();
+	} else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) {
+		printf("memfd sealed after read() but data discarded\n");
+		abort();
+	}
+
+	close(mfd);
+	close(fd);
+
+	printf("fuse: DONE\n");
+	free(zero);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
new file mode 100644
index 000000000..a4e520b94
--- /dev/null
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -0,0 +1,970 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "common.h"
+
+#define MEMFD_STR	"memfd:"
+#define MEMFD_HUGE_STR	"memfd-hugetlb:"
+#define SHARED_FT_STR	"(shared file-table)"
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65536
+
+/*
+ * Default is not to test hugetlbfs
+ */
+static size_t mfd_def_size = MFD_DEF_SIZE;
+static const char *memfd_str = MEMFD_STR;
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+	int r, fd;
+
+	fd = sys_memfd_create(name, flags);
+	if (fd < 0) {
+		printf("memfd_create(\"%s\", %u) failed: %m\n",
+		       name, flags);
+		abort();
+	}
+
+	r = ftruncate(fd, sz);
+	if (r < 0) {
+		printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+		abort();
+	}
+
+	return fd;
+}
+
+static void mfd_fail_new(const char *name, unsigned int flags)
+{
+	int r;
+
+	r = sys_memfd_create(name, flags);
+	if (r >= 0) {
+		printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
+		       name, flags);
+		close(r);
+		abort();
+	}
+}
+
+static unsigned int mfd_assert_get_seals(int fd)
+{
+	int r;
+
+	r = fcntl(fd, F_GET_SEALS);
+	if (r < 0) {
+		printf("GET_SEALS(%d) failed: %m\n", fd);
+		abort();
+	}
+
+	return (unsigned int)r;
+}
+
+static void mfd_assert_has_seals(int fd, unsigned int seals)
+{
+	unsigned int s;
+
+	s = mfd_assert_get_seals(fd);
+	if (s != seals) {
+		printf("%u != %u = GET_SEALS(%d)\n", seals, s, fd);
+		abort();
+	}
+}
+
+static void mfd_assert_add_seals(int fd, unsigned int seals)
+{
+	int r;
+	unsigned int s;
+
+	s = mfd_assert_get_seals(fd);
+	r = fcntl(fd, F_ADD_SEALS, seals);
+	if (r < 0) {
+		printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
+		abort();
+	}
+}
+
+static void mfd_fail_add_seals(int fd, unsigned int seals)
+{
+	int r;
+	unsigned int s;
+
+	r = fcntl(fd, F_GET_SEALS);
+	if (r < 0)
+		s = 0;
+	else
+		s = (unsigned int)r;
+
+	r = fcntl(fd, F_ADD_SEALS, seals);
+	if (r >= 0) {
+		printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
+				fd, s, seals);
+		abort();
+	}
+}
+
+static void mfd_assert_size(int fd, size_t size)
+{
+	struct stat st;
+	int r;
+
+	r = fstat(fd, &st);
+	if (r < 0) {
+		printf("fstat(%d) failed: %m\n", fd);
+		abort();
+	} else if (st.st_size != size) {
+		printf("wrong file size %lld, but expected %lld\n",
+		       (long long)st.st_size, (long long)size);
+		abort();
+	}
+}
+
+static int mfd_assert_dup(int fd)
+{
+	int r;
+
+	r = dup(fd);
+	if (r < 0) {
+		printf("dup(%d) failed: %m\n", fd);
+		abort();
+	}
+
+	return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+	void *p;
+
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ | PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+	void *p;
+
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ,
+		 MAP_PRIVATE,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	return p;
+}
+
+static int mfd_assert_open(int fd, int flags, mode_t mode)
+{
+	char buf[512];
+	int r;
+
+	sprintf(buf, "/proc/self/fd/%d", fd);
+	r = open(buf, flags, mode);
+	if (r < 0) {
+		printf("open(%s) failed: %m\n", buf);
+		abort();
+	}
+
+	return r;
+}
+
+static void mfd_fail_open(int fd, int flags, mode_t mode)
+{
+	char buf[512];
+	int r;
+
+	sprintf(buf, "/proc/self/fd/%d", fd);
+	r = open(buf, flags, mode);
+	if (r >= 0) {
+		printf("open(%s) didn't fail as expected\n", buf);
+		abort();
+	}
+}
+
+static void mfd_assert_read(int fd)
+{
+	char buf[16];
+	void *p;
+	ssize_t l;
+
+	l = read(fd, buf, sizeof(buf));
+	if (l != sizeof(buf)) {
+		printf("read() failed: %m\n");
+		abort();
+	}
+
+	/* verify PROT_READ *is* allowed */
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ,
+		 MAP_PRIVATE,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+	munmap(p, mfd_def_size);
+
+	/* verify MAP_PRIVATE is *always* allowed (even writable) */
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ | PROT_WRITE,
+		 MAP_PRIVATE,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+	munmap(p, mfd_def_size);
+}
+
+static void mfd_assert_write(int fd)
+{
+	ssize_t l;
+	void *p;
+	int r;
+
+	/*
+	 * huegtlbfs does not support write, but we want to
+	 * verify everything else here.
+	 */
+	if (!hugetlbfs_test) {
+		/* verify write() succeeds */
+		l = write(fd, "\0\0\0\0", 4);
+		if (l != 4) {
+			printf("write() failed: %m\n");
+			abort();
+		}
+	}
+
+	/* verify PROT_READ | PROT_WRITE is allowed */
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ | PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+	*(char *)p = 0;
+	munmap(p, mfd_def_size);
+
+	/* verify PROT_WRITE is allowed */
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+	*(char *)p = 0;
+	munmap(p, mfd_def_size);
+
+	/* verify PROT_READ with MAP_SHARED is allowed and a following
+	 * mprotect(PROT_WRITE) allows writing */
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p == MAP_FAILED) {
+		printf("mmap() failed: %m\n");
+		abort();
+	}
+
+	r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
+	if (r < 0) {
+		printf("mprotect() failed: %m\n");
+		abort();
+	}
+
+	*(char *)p = 0;
+	munmap(p, mfd_def_size);
+
+	/* verify PUNCH_HOLE works */
+	r = fallocate(fd,
+		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+		      0,
+		      mfd_def_size);
+	if (r < 0) {
+		printf("fallocate(PUNCH_HOLE) failed: %m\n");
+		abort();
+	}
+}
+
+static void mfd_fail_write(int fd)
+{
+	ssize_t l;
+	void *p;
+	int r;
+
+	/* verify write() fails */
+	l = write(fd, "data", 4);
+	if (l != -EPERM) {
+		printf("expected EPERM on write(), but got %d: %m\n", (int)l);
+		abort();
+	}
+
+	/* verify PROT_READ | PROT_WRITE is not allowed */
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ | PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p != MAP_FAILED) {
+		printf("mmap() didn't fail as expected\n");
+		abort();
+	}
+
+	/* verify PROT_WRITE is not allowed */
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_WRITE,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p != MAP_FAILED) {
+		printf("mmap() didn't fail as expected\n");
+		abort();
+	}
+
+	/* Verify PROT_READ with MAP_SHARED with a following mprotect is not
+	 * allowed. Note that for r/w the kernel already prevents the mmap. */
+	p = mmap(NULL,
+		 mfd_def_size,
+		 PROT_READ,
+		 MAP_SHARED,
+		 fd,
+		 0);
+	if (p != MAP_FAILED) {
+		r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
+		if (r >= 0) {
+			printf("mmap()+mprotect() didn't fail as expected\n");
+			abort();
+		}
+		munmap(p, mfd_def_size);
+	}
+
+	/* verify PUNCH_HOLE fails */
+	r = fallocate(fd,
+		      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+		      0,
+		      mfd_def_size);
+	if (r >= 0) {
+		printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
+		abort();
+	}
+}
+
+static void mfd_assert_shrink(int fd)
+{
+	int r, fd2;
+
+	r = ftruncate(fd, mfd_def_size / 2);
+	if (r < 0) {
+		printf("ftruncate(SHRINK) failed: %m\n");
+		abort();
+	}
+
+	mfd_assert_size(fd, mfd_def_size / 2);
+
+	fd2 = mfd_assert_open(fd,
+			      O_RDWR | O_CREAT | O_TRUNC,
+			      S_IRUSR | S_IWUSR);
+	close(fd2);
+
+	mfd_assert_size(fd, 0);
+}
+
+static void mfd_fail_shrink(int fd)
+{
+	int r;
+
+	r = ftruncate(fd, mfd_def_size / 2);
+	if (r >= 0) {
+		printf("ftruncate(SHRINK) didn't fail as expected\n");
+		abort();
+	}
+
+	mfd_fail_open(fd,
+		      O_RDWR | O_CREAT | O_TRUNC,
+		      S_IRUSR | S_IWUSR);
+}
+
+static void mfd_assert_grow(int fd)
+{
+	int r;
+
+	r = ftruncate(fd, mfd_def_size * 2);
+	if (r < 0) {
+		printf("ftruncate(GROW) failed: %m\n");
+		abort();
+	}
+
+	mfd_assert_size(fd, mfd_def_size * 2);
+
+	r = fallocate(fd,
+		      0,
+		      0,
+		      mfd_def_size * 4);
+	if (r < 0) {
+		printf("fallocate(ALLOC) failed: %m\n");
+		abort();
+	}
+
+	mfd_assert_size(fd, mfd_def_size * 4);
+}
+
+static void mfd_fail_grow(int fd)
+{
+	int r;
+
+	r = ftruncate(fd, mfd_def_size * 2);
+	if (r >= 0) {
+		printf("ftruncate(GROW) didn't fail as expected\n");
+		abort();
+	}
+
+	r = fallocate(fd,
+		      0,
+		      0,
+		      mfd_def_size * 4);
+	if (r >= 0) {
+		printf("fallocate(ALLOC) didn't fail as expected\n");
+		abort();
+	}
+}
+
+static void mfd_assert_grow_write(int fd)
+{
+	static char *buf;
+	ssize_t l;
+
+	/* hugetlbfs does not support write */
+	if (hugetlbfs_test)
+		return;
+
+	buf = malloc(mfd_def_size * 8);
+	if (!buf) {
+		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
+		abort();
+	}
+
+	l = pwrite(fd, buf, mfd_def_size * 8, 0);
+	if (l != (mfd_def_size * 8)) {
+		printf("pwrite() failed: %m\n");
+		abort();
+	}
+
+	mfd_assert_size(fd, mfd_def_size * 8);
+}
+
+static void mfd_fail_grow_write(int fd)
+{
+	static char *buf;
+	ssize_t l;
+
+	/* hugetlbfs does not support write */
+	if (hugetlbfs_test)
+		return;
+
+	buf = malloc(mfd_def_size * 8);
+	if (!buf) {
+		printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
+		abort();
+	}
+
+	l = pwrite(fd, buf, mfd_def_size * 8, 0);
+	if (l == (mfd_def_size * 8)) {
+		printf("pwrite() didn't fail as expected\n");
+		abort();
+	}
+}
+
+static int idle_thread_fn(void *arg)
+{
+	sigset_t set;
+	int sig;
+
+	/* dummy waiter; SIGTERM terminates us anyway */
+	sigemptyset(&set);
+	sigaddset(&set, SIGTERM);
+	sigwait(&set, &sig);
+
+	return 0;
+}
+
+static pid_t spawn_idle_thread(unsigned int flags)
+{
+	uint8_t *stack;
+	pid_t pid;
+
+	stack = malloc(STACK_SIZE);
+	if (!stack) {
+		printf("malloc(STACK_SIZE) failed: %m\n");
+		abort();
+	}
+
+	pid = clone(idle_thread_fn,
+		    stack + STACK_SIZE,
+		    SIGCHLD | flags,
+		    NULL);
+	if (pid < 0) {
+		printf("clone() failed: %m\n");
+		abort();
+	}
+
+	return pid;
+}
+
+static void join_idle_thread(pid_t pid)
+{
+	kill(pid, SIGTERM);
+	waitpid(pid, NULL, 0);
+}
+
+/*
+ * Test memfd_create() syscall
+ * Verify syscall-argument validation, including name checks, flag validation
+ * and more.
+ */
+static void test_create(void)
+{
+	char buf[2048];
+	int fd;
+
+	printf("%s CREATE\n", memfd_str);
+
+	/* test NULL name */
+	mfd_fail_new(NULL, 0);
+
+	/* test over-long name (not zero-terminated) */
+	memset(buf, 0xff, sizeof(buf));
+	mfd_fail_new(buf, 0);
+
+	/* test over-long zero-terminated name */
+	memset(buf, 0xff, sizeof(buf));
+	buf[sizeof(buf) - 1] = 0;
+	mfd_fail_new(buf, 0);
+
+	/* verify "" is a valid name */
+	fd = mfd_assert_new("", 0, 0);
+	close(fd);
+
+	/* verify invalid O_* open flags */
+	mfd_fail_new("", 0x0100);
+	mfd_fail_new("", ~MFD_CLOEXEC);
+	mfd_fail_new("", ~MFD_ALLOW_SEALING);
+	mfd_fail_new("", ~0);
+	mfd_fail_new("", 0x80000000U);
+
+	/* verify MFD_CLOEXEC is allowed */
+	fd = mfd_assert_new("", 0, MFD_CLOEXEC);
+	close(fd);
+
+	/* verify MFD_ALLOW_SEALING is allowed */
+	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
+	close(fd);
+
+	/* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
+	fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+	close(fd);
+}
+
+/*
+ * Test basic sealing
+ * A very basic sealing test to see whether setting/retrieving seals works.
+ */
+static void test_basic(void)
+{
+	int fd;
+
+	printf("%s BASIC\n", memfd_str);
+
+	fd = mfd_assert_new("kern_memfd_basic",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+	/* add basic seals */
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+				 F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+				 F_SEAL_WRITE);
+
+	/* add them again */
+	mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+				 F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+				 F_SEAL_WRITE);
+
+	/* add more seals and seal against sealing */
+	mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+				 F_SEAL_GROW |
+				 F_SEAL_WRITE |
+				 F_SEAL_SEAL);
+
+	/* verify that sealing no longer works */
+	mfd_fail_add_seals(fd, F_SEAL_GROW);
+	mfd_fail_add_seals(fd, 0);
+
+	close(fd);
+
+	/* verify sealing does not work without MFD_ALLOW_SEALING */
+	fd = mfd_assert_new("kern_memfd_basic",
+			    mfd_def_size,
+			    MFD_CLOEXEC);
+	mfd_assert_has_seals(fd, F_SEAL_SEAL);
+	mfd_fail_add_seals(fd, F_SEAL_SHRINK |
+			       F_SEAL_GROW |
+			       F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_SEAL);
+	close(fd);
+}
+
+/*
+ * Test SEAL_WRITE
+ * Test whether SEAL_WRITE actually prevents modifications.
+ */
+static void test_seal_write(void)
+{
+	int fd;
+
+	printf("%s SEAL-WRITE\n", memfd_str);
+
+	fd = mfd_assert_new("kern_memfd_seal_write",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE);
+
+	mfd_assert_read(fd);
+	mfd_fail_write(fd);
+	mfd_assert_shrink(fd);
+	mfd_assert_grow(fd);
+	mfd_fail_grow_write(fd);
+
+	close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK
+ * Test whether SEAL_SHRINK actually prevents shrinking
+ */
+static void test_seal_shrink(void)
+{
+	int fd;
+
+	printf("%s SEAL-SHRINK\n", memfd_str);
+
+	fd = mfd_assert_new("kern_memfd_seal_shrink",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+
+	mfd_assert_read(fd);
+	mfd_assert_write(fd);
+	mfd_fail_shrink(fd);
+	mfd_assert_grow(fd);
+	mfd_assert_grow_write(fd);
+
+	close(fd);
+}
+
+/*
+ * Test SEAL_GROW
+ * Test whether SEAL_GROW actually prevents growing
+ */
+static void test_seal_grow(void)
+{
+	int fd;
+
+	printf("%s SEAL-GROW\n", memfd_str);
+
+	fd = mfd_assert_new("kern_memfd_seal_grow",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_GROW);
+	mfd_assert_has_seals(fd, F_SEAL_GROW);
+
+	mfd_assert_read(fd);
+	mfd_assert_write(fd);
+	mfd_assert_shrink(fd);
+	mfd_fail_grow(fd);
+	mfd_fail_grow_write(fd);
+
+	close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK | SEAL_GROW
+ * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
+ */
+static void test_seal_resize(void)
+{
+	int fd;
+
+	printf("%s SEAL-RESIZE\n", memfd_str);
+
+	fd = mfd_assert_new("kern_memfd_seal_resize",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+
+	mfd_assert_read(fd);
+	mfd_assert_write(fd);
+	mfd_fail_shrink(fd);
+	mfd_fail_grow(fd);
+	mfd_fail_grow_write(fd);
+
+	close(fd);
+}
+
+/*
+ * Test sharing via dup()
+ * Test that seals are shared between dupped FDs and they're all equal.
+ */
+static void test_share_dup(char *banner, char *b_suffix)
+{
+	int fd, fd2;
+
+	printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+	fd = mfd_assert_new("kern_memfd_share_dup",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+
+	fd2 = mfd_assert_dup(fd);
+	mfd_assert_has_seals(fd2, 0);
+
+	mfd_assert_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+	mfd_assert_add_seals(fd, F_SEAL_SEAL);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+	mfd_fail_add_seals(fd, F_SEAL_GROW);
+	mfd_fail_add_seals(fd2, F_SEAL_GROW);
+	mfd_fail_add_seals(fd, F_SEAL_SEAL);
+	mfd_fail_add_seals(fd2, F_SEAL_SEAL);
+
+	close(fd2);
+
+	mfd_fail_add_seals(fd, F_SEAL_GROW);
+	close(fd);
+}
+
+/*
+ * Test sealing with active mmap()s
+ * Modifying seals is only allowed if no other mmap() refs exist.
+ */
+static void test_share_mmap(char *banner, char *b_suffix)
+{
+	int fd;
+	void *p;
+
+	printf("%s %s %s\n", memfd_str,  banner, b_suffix);
+
+	fd = mfd_assert_new("kern_memfd_share_mmap",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+
+	/* shared/writable ref prevents sealing WRITE, but allows others */
+	p = mfd_assert_mmap_shared(fd);
+	mfd_fail_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, 0);
+	mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+	munmap(p, mfd_def_size);
+
+	/* readable ref allows sealing */
+	p = mfd_assert_mmap_private(fd);
+	mfd_assert_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+	munmap(p, mfd_def_size);
+
+	close(fd);
+}
+
+/*
+ * Test sealing with open(/proc/self/fd/%d)
+ * Via /proc we can get access to a separate file-context for the same memfd.
+ * This is *not* like dup(), but like a real separate open(). Make sure the
+ * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
+ */
+static void test_share_open(char *banner, char *b_suffix)
+{
+	int fd, fd2;
+
+	printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+	fd = mfd_assert_new("kern_memfd_share_open",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+
+	fd2 = mfd_assert_open(fd, O_RDWR, 0);
+	mfd_assert_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+	mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+	close(fd);
+	fd = mfd_assert_open(fd2, O_RDONLY, 0);
+
+	mfd_fail_add_seals(fd, F_SEAL_SEAL);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+	close(fd2);
+	fd2 = mfd_assert_open(fd, O_RDWR, 0);
+
+	mfd_assert_add_seals(fd2, F_SEAL_SEAL);
+	mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+	mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+	close(fd2);
+	close(fd);
+}
+
+/*
+ * Test sharing via fork()
+ * Test whether seal-modifications work as expected with forked childs.
+ */
+static void test_share_fork(char *banner, char *b_suffix)
+{
+	int fd;
+	pid_t pid;
+
+	printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+	fd = mfd_assert_new("kern_memfd_share_fork",
+			    mfd_def_size,
+			    MFD_CLOEXEC | MFD_ALLOW_SEALING);
+	mfd_assert_has_seals(fd, 0);
+
+	pid = spawn_idle_thread(0);
+	mfd_assert_add_seals(fd, F_SEAL_SEAL);
+	mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+	mfd_fail_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+	join_idle_thread(pid);
+
+	mfd_fail_add_seals(fd, F_SEAL_WRITE);
+	mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	pid_t pid;
+
+	if (argc == 2) {
+		if (!strcmp(argv[1], "hugetlbfs")) {
+			unsigned long hpage_size = default_huge_page_size();
+
+			if (!hpage_size) {
+				printf("Unable to determine huge page size\n");
+				abort();
+			}
+
+			hugetlbfs_test = 1;
+			memfd_str = MEMFD_HUGE_STR;
+			mfd_def_size = hpage_size * 2;
+		} else {
+			printf("Unknown option: %s\n", argv[1]);
+			abort();
+		}
+	}
+
+	test_create();
+	test_basic();
+
+	test_seal_write();
+	test_seal_shrink();
+	test_seal_grow();
+	test_seal_resize();
+
+	test_share_dup("SHARE-DUP", "");
+	test_share_mmap("SHARE-MMAP", "");
+	test_share_open("SHARE-OPEN", "");
+	test_share_fork("SHARE-FORK", "");
+
+	/* Run test-suite in a multi-threaded environment with a shared
+	 * file-table. */
+	pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
+	test_share_dup("SHARE-DUP", SHARED_FT_STR);
+	test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
+	test_share_open("SHARE-OPEN", SHARED_FT_STR);
+	test_share_fork("SHARE-FORK", SHARED_FT_STR);
+	join_idle_thread(pid);
+
+	printf("memfd: DONE\n");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh
new file mode 100755
index 000000000..22e572e2d
--- /dev/null
+++ b/tools/testing/selftests/memfd/run_fuse_test.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if test -d "./mnt" ; then
+	fusermount -u ./mnt
+	rmdir ./mnt
+fi
+
+set -e
+
+mkdir mnt
+./fuse_mnt ./mnt
+./fuse_test ./mnt/memfd $@
+fusermount -u ./mnt
+rmdir ./mnt
diff --git a/tools/testing/selftests/memfd/run_hugetlbfs_test.sh b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
new file mode 100755
index 000000000..fb633eeb0
--- /dev/null
+++ b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# please run as root
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+#
+# To test memfd_create with hugetlbfs, there needs to be hpages_test
+# huge pages free.  Attempt to allocate enough pages to test.
+#
+hpages_test=8
+
+#
+# Get count of free huge pages from /proc/meminfo
+#
+while read name size unit; do
+        if [ "$name" = "HugePages_Free:" ]; then
+                freepgs=$size
+        fi
+done < /proc/meminfo
+
+#
+# If not enough free huge pages for test, attempt to increase
+#
+if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then
+	nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+	hpages_needed=`expr $hpages_test - $freepgs`
+
+	if [ $UID != 0 ]; then
+		echo "Please run memfd with hugetlbfs test as root"
+		exit $ksft_skip
+	fi
+
+	echo 3 > /proc/sys/vm/drop_caches
+	echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
+	while read name size unit; do
+		if [ "$name" = "HugePages_Free:" ]; then
+			freepgs=$size
+		fi
+	done < /proc/meminfo
+fi
+
+#
+# If still not enough huge pages available, exit.  But, give back any huge
+# pages potentially allocated above.
+#
+if [ $freepgs -lt $hpages_test ]; then
+	# nr_hugepgs non-zero only if we attempted to increase
+	if [ -n "$nr_hugepgs" ]; then
+		echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+	fi
+	printf "Not enough huge pages available (%d < %d)\n" \
+		$freepgs $needpgs
+	exit $ksft_skip
+fi
+
+#
+# Run the hugetlbfs test
+#
+./memfd_test hugetlbfs
+./run_fuse_test.sh hugetlbfs
+
+#
+# Give back any huge pages allocated for the test
+#
+if [ -n "$nr_hugepgs" ]; then
+	echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+fi
diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
new file mode 100644
index 000000000..e0a625e34
--- /dev/null
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+TEST_PROGS := mem-on-off-test.sh
+
+run_full_test:
+	@/bin/bash ./mem-on-off-test.sh -r 10 && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
+
+clean:
diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config
new file mode 100644
index 000000000..a7e8cd5bb
--- /dev/null
+++ b/tools/testing/selftests/memory-hotplug/config
@@ -0,0 +1,5 @@
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTPLUG_SPARSE=y
+CONFIG_NOTIFIER_ERROR_INJECTION=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_MEMORY_HOTREMOVE=y
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
new file mode 100755
index 000000000..b37585e6a
--- /dev/null
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -0,0 +1,291 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+SYSFS=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+prerequisite()
+{
+	msg="skip all tests:"
+
+	if [ $UID != 0 ]; then
+		echo $msg must be run as root >&2
+		exit $ksft_skip
+	fi
+
+	SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+	if [ ! -d "$SYSFS" ]; then
+		echo $msg sysfs is not mounted >&2
+		exit $ksft_skip
+	fi
+
+	if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
+		echo $msg memory hotplug is not supported >&2
+		exit $ksft_skip
+	fi
+
+	if ! grep -q 1 $SYSFS/devices/system/memory/memory*/removable; then
+		echo $msg no hot-pluggable memory >&2
+		exit $ksft_skip
+	fi
+}
+
+#
+# list all hot-pluggable memory
+#
+hotpluggable_memory()
+{
+	local state=${1:-.\*}
+
+	for memory in $SYSFS/devices/system/memory/memory*; do
+		if grep -q 1 $memory/removable &&
+		   grep -q $state $memory/state; then
+			echo ${memory##/*/memory}
+		fi
+	done
+}
+
+hotpluggable_offline_memory()
+{
+	hotpluggable_memory offline
+}
+
+hotpluggable_online_memory()
+{
+	hotpluggable_memory online
+}
+
+memory_is_online()
+{
+	grep -q online $SYSFS/devices/system/memory/memory$1/state
+}
+
+memory_is_offline()
+{
+	grep -q offline $SYSFS/devices/system/memory/memory$1/state
+}
+
+online_memory()
+{
+	echo online > $SYSFS/devices/system/memory/memory$1/state
+}
+
+offline_memory()
+{
+	echo offline > $SYSFS/devices/system/memory/memory$1/state
+}
+
+online_memory_expect_success()
+{
+	local memory=$1
+
+	if ! online_memory $memory; then
+		echo $FUNCNAME $memory: unexpected fail >&2
+		return 1
+	elif ! memory_is_online $memory; then
+		echo $FUNCNAME $memory: unexpected offline >&2
+		return 1
+	fi
+	return 0
+}
+
+online_memory_expect_fail()
+{
+	local memory=$1
+
+	if online_memory $memory 2> /dev/null; then
+		echo $FUNCNAME $memory: unexpected success >&2
+		return 1
+	elif ! memory_is_offline $memory; then
+		echo $FUNCNAME $memory: unexpected online >&2
+		return 1
+	fi
+	return 0
+}
+
+offline_memory_expect_success()
+{
+	local memory=$1
+
+	if ! offline_memory $memory; then
+		echo $FUNCNAME $memory: unexpected fail >&2
+		return 1
+	elif ! memory_is_offline $memory; then
+		echo $FUNCNAME $memory: unexpected offline >&2
+		return 1
+	fi
+	return 0
+}
+
+offline_memory_expect_fail()
+{
+	local memory=$1
+
+	if offline_memory $memory 2> /dev/null; then
+		echo $FUNCNAME $memory: unexpected success >&2
+		return 1
+	elif ! memory_is_online $memory; then
+		echo $FUNCNAME $memory: unexpected offline >&2
+		return 1
+	fi
+	return 0
+}
+
+error=-12
+priority=0
+# Run with default of ratio=2 for Kselftest run
+ratio=2
+retval=0
+
+while getopts e:hp:r: opt; do
+	case $opt in
+	e)
+		error=$OPTARG
+		;;
+	h)
+		echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]"
+		exit
+		;;
+	p)
+		priority=$OPTARG
+		;;
+	r)
+		ratio=$OPTARG
+		if [ "$ratio" -gt 100 ] || [ "$ratio" -lt 0 ]; then
+			echo "The percentage should be an integer within 0~100 range"
+			exit 1
+		fi
+		;;
+	esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+	echo "error code must be -4095 <= errno < 0" >&2
+	exit 1
+fi
+
+prerequisite
+
+echo "Test scope: $ratio% hotplug memory"
+
+#
+# Online all hot-pluggable memory
+#
+hotpluggable_num=`hotpluggable_offline_memory | wc -l`
+echo -e "\t online all hot-pluggable memory in offline state:"
+if [ "$hotpluggable_num" -gt 0 ]; then
+	for memory in `hotpluggable_offline_memory`; do
+		echo "offline->online memory$memory"
+		if ! online_memory_expect_success $memory; then
+			retval=1
+		fi
+	done
+else
+	echo -e "\t\t SKIPPED - no hot-pluggable memory in offline state"
+fi
+
+#
+# Offline $ratio percent of hot-pluggable memory
+#
+hotpluggable_num=`hotpluggable_online_memory | wc -l`
+target=`echo "a=$hotpluggable_num*$ratio; if ( a%100 ) a/100+1 else a/100" | bc`
+echo -e "\t offline $ratio% hot-pluggable memory in online state"
+echo -e "\t trying to offline $target out of $hotpluggable_num memory block(s):"
+for memory in `hotpluggable_online_memory`; do
+	if [ "$target" -gt 0 ]; then
+		echo "online->offline memory$memory"
+		if offline_memory_expect_success $memory; then
+			target=$(($target - 1))
+		fi
+	fi
+done
+if [ "$target" -gt 0 ]; then
+	retval=1
+	echo -e "\t\t FAILED - unable to offline some memory blocks, device busy?"
+fi
+
+#
+# Online all hot-pluggable memory again
+#
+hotpluggable_num=`hotpluggable_offline_memory | wc -l`
+echo -e "\t online all hot-pluggable memory in offline state:"
+if [ "$hotpluggable_num" -gt 0 ]; then
+	for memory in `hotpluggable_offline_memory`; do
+		echo "offline->online memory$memory"
+		if ! online_memory_expect_success $memory; then
+			retval=1
+		fi
+	done
+else
+	echo -e "\t\t SKIPPED - no hot-pluggable memory in offline state"
+fi
+
+#
+# Test with memory notifier error injection
+#
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory
+
+prerequisite_extra()
+{
+	msg="skip extra tests:"
+
+	/sbin/modprobe -q -r memory-notifier-error-inject
+	/sbin/modprobe -q memory-notifier-error-inject priority=$priority
+
+	if [ ! -d "$DEBUGFS" ]; then
+		echo $msg debugfs is not mounted >&2
+		exit $retval
+	fi
+
+	if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
+		echo $msg memory-notifier-error-inject module is not available >&2
+		exit $retval
+	fi
+}
+
+echo -e "\t Test with memory notifier error injection"
+prerequisite_extra
+
+#
+# Offline $ratio percent of hot-pluggable memory
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+for memory in `hotpluggable_online_memory`; do
+	if [ $((RANDOM % 100)) -lt $ratio ]; then
+		offline_memory_expect_success $memory
+	fi
+done
+
+#
+# Test memory hot-add error handling (offline => online)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
+for memory in `hotpluggable_offline_memory`; do
+	online_memory_expect_fail $memory
+done
+
+#
+# Online all hot-pluggable memory
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
+for memory in `hotpluggable_offline_memory`; do
+	online_memory_expect_success $memory
+done
+
+#
+# Test memory hot-remove error handling (online => offline)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+for memory in `hotpluggable_online_memory`; do
+	offline_memory_expect_fail $memory
+done
+
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+/sbin/modprobe -q -r memory-notifier-error-inject
+
+exit $retval
diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore
new file mode 100644
index 000000000..856ad4107
--- /dev/null
+++ b/tools/testing/selftests/mount/.gitignore
@@ -0,0 +1 @@
+unprivileged-remount-test
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
new file mode 100644
index 000000000..026890744
--- /dev/null
+++ b/tools/testing/selftests/mount/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -Wall \
+         -O2
+
+TEST_PROGS := run_tests.sh
+TEST_GEN_FILES := unprivileged-remount-test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mount/config b/tools/testing/selftests/mount/config
new file mode 100644
index 000000000..416bd53ce
--- /dev/null
+++ b/tools/testing/selftests/mount/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh
new file mode 100755
index 000000000..4ab8f507d
--- /dev/null
+++ b/tools/testing/selftests/mount/run_tests.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Run mount selftests
+if [ -f /proc/self/uid_map ] ; then
+	./unprivileged-remount-test ;
+else
+	echo "WARN: No /proc/self/uid_map exist, test skipped." ;
+	exit $ksft_skip
+fi
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
new file mode 100644
index 000000000..584dc6bc3
--- /dev/null
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#ifndef CLONE_NEWNS
+# define CLONE_NEWNS 0x00020000
+#endif
+#ifndef CLONE_NEWUTS
+# define CLONE_NEWUTS 0x04000000
+#endif
+#ifndef CLONE_NEWIPC
+# define CLONE_NEWIPC 0x08000000
+#endif
+#ifndef CLONE_NEWNET
+# define CLONE_NEWNET 0x40000000
+#endif
+#ifndef CLONE_NEWUSER
+# define CLONE_NEWUSER 0x10000000
+#endif
+#ifndef CLONE_NEWPID
+# define CLONE_NEWPID 0x20000000
+#endif
+
+#ifndef MS_REC
+# define MS_REC 16384
+#endif
+#ifndef MS_RELATIME
+# define MS_RELATIME (1 << 21)
+#endif
+#ifndef MS_STRICTATIME
+# define MS_STRICTATIME (1 << 24)
+#endif
+
+static void die(char *fmt, ...)
+{
+	va_list ap;
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	exit(EXIT_FAILURE);
+}
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
+{
+	char buf[4096];
+	int fd;
+	ssize_t written;
+	int buf_len;
+
+	buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+	if (buf_len < 0) {
+		die("vsnprintf failed: %s\n",
+		    strerror(errno));
+	}
+	if (buf_len >= sizeof(buf)) {
+		die("vsnprintf output truncated\n");
+	}
+
+	fd = open(filename, O_WRONLY);
+	if (fd < 0) {
+		if ((errno == ENOENT) && enoent_ok)
+			return;
+		die("open of %s failed: %s\n",
+		    filename, strerror(errno));
+	}
+	written = write(fd, buf, buf_len);
+	if (written != buf_len) {
+		if (written >= 0) {
+			die("short write to %s\n", filename);
+		} else {
+			die("write to %s failed: %s\n",
+				filename, strerror(errno));
+		}
+	}
+	if (close(fd) != 0) {
+		die("close of %s failed: %s\n",
+			filename, strerror(errno));
+	}
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmaybe_write_file(true, filename, fmt, ap);
+	va_end(ap);
+
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vmaybe_write_file(false, filename, fmt, ap);
+	va_end(ap);
+
+}
+
+static int read_mnt_flags(const char *path)
+{
+	int ret;
+	struct statvfs stat;
+	int mnt_flags;
+
+	ret = statvfs(path, &stat);
+	if (ret != 0) {
+		die("statvfs of %s failed: %s\n",
+			path, strerror(errno));
+	}
+	if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
+			ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
+			ST_SYNCHRONOUS | ST_MANDLOCK)) {
+		die("Unrecognized mount flags\n");
+	}
+	mnt_flags = 0;
+	if (stat.f_flag & ST_RDONLY)
+		mnt_flags |= MS_RDONLY;
+	if (stat.f_flag & ST_NOSUID)
+		mnt_flags |= MS_NOSUID;
+	if (stat.f_flag & ST_NODEV)
+		mnt_flags |= MS_NODEV;
+	if (stat.f_flag & ST_NOEXEC)
+		mnt_flags |= MS_NOEXEC;
+	if (stat.f_flag & ST_NOATIME)
+		mnt_flags |= MS_NOATIME;
+	if (stat.f_flag & ST_NODIRATIME)
+		mnt_flags |= MS_NODIRATIME;
+	if (stat.f_flag & ST_RELATIME)
+		mnt_flags |= MS_RELATIME;
+	if (stat.f_flag & ST_SYNCHRONOUS)
+		mnt_flags |= MS_SYNCHRONOUS;
+	if (stat.f_flag & ST_MANDLOCK)
+		mnt_flags |= ST_MANDLOCK;
+
+	return mnt_flags;
+}
+
+static void create_and_enter_userns(void)
+{
+	uid_t uid;
+	gid_t gid;
+
+	uid = getuid();
+	gid = getgid();
+
+	if (unshare(CLONE_NEWUSER) !=0) {
+		die("unshare(CLONE_NEWUSER) failed: %s\n",
+			strerror(errno));
+	}
+
+	maybe_write_file("/proc/self/setgroups", "deny");
+	write_file("/proc/self/uid_map", "0 %d 1", uid);
+	write_file("/proc/self/gid_map", "0 %d 1", gid);
+
+	if (setgid(0) != 0) {
+		die ("setgid(0) failed %s\n",
+			strerror(errno));
+	}
+	if (setuid(0) != 0) {
+		die("setuid(0) failed %s\n",
+			strerror(errno));
+	}
+}
+
+static
+bool test_unpriv_remount(const char *fstype, const char *mount_options,
+			 int mount_flags, int remount_flags, int invalid_flags)
+{
+	pid_t child;
+
+	child = fork();
+	if (child == -1) {
+		die("fork failed: %s\n",
+			strerror(errno));
+	}
+	if (child != 0) { /* parent */
+		pid_t pid;
+		int status;
+		pid = waitpid(child, &status, 0);
+		if (pid == -1) {
+			die("waitpid failed: %s\n",
+				strerror(errno));
+		}
+		if (pid != child) {
+			die("waited for %d got %d\n",
+				child, pid);
+		}
+		if (!WIFEXITED(status)) {
+			die("child did not terminate cleanly\n");
+		}
+		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+	}
+
+	create_and_enter_userns();
+	if (unshare(CLONE_NEWNS) != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+
+	if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
+		die("mount of %s with options '%s' on /tmp failed: %s\n",
+		    fstype,
+		    mount_options? mount_options : "",
+		    strerror(errno));
+	}
+
+	create_and_enter_userns();
+
+	if (unshare(CLONE_NEWNS) != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+
+	if (mount("/tmp", "/tmp", "none",
+		  MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) {
+		/* system("cat /proc/self/mounts"); */
+		die("remount of /tmp failed: %s\n",
+		    strerror(errno));
+	}
+
+	if (mount("/tmp", "/tmp", "none",
+		  MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) {
+		/* system("cat /proc/self/mounts"); */
+		die("remount of /tmp with invalid flags "
+		    "succeeded unexpectedly\n");
+	}
+	exit(EXIT_SUCCESS);
+}
+
+static bool test_unpriv_remount_simple(int mount_flags)
+{
+	return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
+}
+
+static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
+{
+	return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
+				   invalid_flags);
+}
+
+static bool test_priv_mount_unpriv_remount(void)
+{
+	pid_t child;
+	int ret;
+	const char *orig_path = "/dev";
+	const char *dest_path = "/tmp";
+	int orig_mnt_flags, remount_mnt_flags;
+
+	child = fork();
+	if (child == -1) {
+		die("fork failed: %s\n",
+			strerror(errno));
+	}
+	if (child != 0) { /* parent */
+		pid_t pid;
+		int status;
+		pid = waitpid(child, &status, 0);
+		if (pid == -1) {
+			die("waitpid failed: %s\n",
+				strerror(errno));
+		}
+		if (pid != child) {
+			die("waited for %d got %d\n",
+				child, pid);
+		}
+		if (!WIFEXITED(status)) {
+			die("child did not terminate cleanly\n");
+		}
+		return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+	}
+
+	orig_mnt_flags = read_mnt_flags(orig_path);
+
+	create_and_enter_userns();
+	ret = unshare(CLONE_NEWNS);
+	if (ret != 0) {
+		die("unshare(CLONE_NEWNS) failed: %s\n",
+			strerror(errno));
+	}
+
+	ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
+	if (ret != 0) {
+		die("recursive bind mount of %s onto %s failed: %s\n",
+			orig_path, dest_path, strerror(errno));
+	}
+
+	ret = mount(dest_path, dest_path, "none",
+		    MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
+	if (ret != 0) {
+		/* system("cat /proc/self/mounts"); */
+		die("remount of /tmp failed: %s\n",
+		    strerror(errno));
+	}
+
+	remount_mnt_flags = read_mnt_flags(dest_path);
+	if (orig_mnt_flags != remount_mnt_flags) {
+		die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
+			dest_path, orig_path);
+	}
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	if (!test_unpriv_remount_simple(MS_RDONLY)) {
+		die("MS_RDONLY malfunctions\n");
+	}
+	if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
+		die("MS_NODEV malfunctions\n");
+	}
+	if (!test_unpriv_remount_simple(MS_NOSUID)) {
+		die("MS_NOSUID malfunctions\n");
+	}
+	if (!test_unpriv_remount_simple(MS_NOEXEC)) {
+		die("MS_NOEXEC malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_RELATIME,
+				       MS_NOATIME))
+	{
+		die("MS_RELATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_STRICTATIME,
+				       MS_NOATIME))
+	{
+		die("MS_STRICTATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_NOATIME,
+				       MS_STRICTATIME))
+	{
+		die("MS_NOATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
+				       MS_NOATIME))
+	{
+		die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
+				       MS_NOATIME))
+	{
+		die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
+				       MS_STRICTATIME))
+	{
+		die("MS_NOATIME|MS_DIRATIME malfunctions\n");
+	}
+	if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
+	{
+		die("Default atime malfunctions\n");
+	}
+	if (!test_priv_mount_unpriv_remount()) {
+		die("Mount flags unexpectedly changed after remount\n");
+	}
+	return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/mqueue/.gitignore b/tools/testing/selftests/mqueue/.gitignore
new file mode 100644
index 000000000..d8d423772
--- /dev/null
+++ b/tools/testing/selftests/mqueue/.gitignore
@@ -0,0 +1,2 @@
+mq_open_tests
+mq_perf_tests
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
new file mode 100644
index 000000000..8a58055fc
--- /dev/null
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O2
+LDLIBS = -lrt -lpthread -lpopt
+
+TEST_GEN_PROGS := mq_open_tests mq_perf_tests
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c
new file mode 100644
index 000000000..9403ac01b
--- /dev/null
+++ b/tools/testing/selftests/mqueue/mq_open_tests.c
@@ -0,0 +1,502 @@
+/*
+ * This application is Copyright 2012 Red Hat, Inc.
+ *	Doug Ledford <dledford@redhat.com>
+ *
+ * mq_open_tests is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * mq_open_tests is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For the full text of the license, see <http://www.gnu.org/licenses/>.
+ *
+ * mq_open_tests.c
+ *   Tests the various situations that should either succeed or fail to
+ *   open a posix message queue and then reports whether or not they
+ *   did as they were supposed to.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <mqueue.h>
+#include <error.h>
+
+#include "../kselftest.h"
+
+static char *usage =
+"Usage:\n"
+"  %s path\n"
+"\n"
+"	path	Path name of the message queue to create\n"
+"\n"
+"	Note: this program must be run as root in order to enable all tests\n"
+"\n";
+
+char *DEF_MSGS = "/proc/sys/fs/mqueue/msg_default";
+char *DEF_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_default";
+char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
+char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
+
+int default_settings;
+struct rlimit saved_limits, cur_limits;
+int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize;
+int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize;
+FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize;
+char *queue_path;
+char *default_queue_path = "/test1";
+mqd_t queue = -1;
+
+static inline void __set(FILE *stream, int value, char *err_msg);
+void shutdown(int exit_val, char *err_cause, int line_no);
+static inline int get(FILE *stream);
+static inline void set(FILE *stream, int value);
+static inline void getr(int type, struct rlimit *rlim);
+static inline void setr(int type, struct rlimit *rlim);
+void validate_current_settings();
+static inline void test_queue(struct mq_attr *attr, struct mq_attr *result);
+static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result);
+
+static inline void __set(FILE *stream, int value, char *err_msg)
+{
+	rewind(stream);
+	if (fprintf(stream, "%d", value) < 0)
+		perror(err_msg);
+}
+
+
+void shutdown(int exit_val, char *err_cause, int line_no)
+{
+	static int in_shutdown = 0;
+
+	/* In case we get called recursively by a set() call below */
+	if (in_shutdown++)
+		return;
+
+	if (seteuid(0) == -1)
+		perror("seteuid() failed");
+
+	if (queue != -1)
+		if (mq_close(queue))
+			perror("mq_close() during shutdown");
+	if (queue_path)
+		/*
+		 * Be silent if this fails, if we cleaned up already it's
+		 * expected to fail
+		 */
+		mq_unlink(queue_path);
+	if (default_settings) {
+		if (saved_def_msgs)
+			__set(def_msgs, saved_def_msgs,
+			      "failed to restore saved_def_msgs");
+		if (saved_def_msgsize)
+			__set(def_msgsize, saved_def_msgsize,
+			      "failed to restore saved_def_msgsize");
+	}
+	if (saved_max_msgs)
+		__set(max_msgs, saved_max_msgs,
+		      "failed to restore saved_max_msgs");
+	if (saved_max_msgsize)
+		__set(max_msgsize, saved_max_msgsize,
+		      "failed to restore saved_max_msgsize");
+	if (exit_val)
+		error(exit_val, errno, "%s at %d", err_cause, line_no);
+	exit(0);
+}
+
+static inline int get(FILE *stream)
+{
+	int value;
+	rewind(stream);
+	if (fscanf(stream, "%d", &value) != 1)
+		shutdown(4, "Error reading /proc entry", __LINE__ - 1);
+	return value;
+}
+
+static inline void set(FILE *stream, int value)
+{
+	int new_value;
+
+	rewind(stream);
+	if (fprintf(stream, "%d", value) < 0)
+		return shutdown(5, "Failed writing to /proc file",
+				__LINE__ - 1);
+	new_value = get(stream);
+	if (new_value != value)
+		return shutdown(5, "We didn't get what we wrote to /proc back",
+				__LINE__ - 1);
+}
+
+static inline void getr(int type, struct rlimit *rlim)
+{
+	if (getrlimit(type, rlim))
+		shutdown(6, "getrlimit()", __LINE__ - 1);
+}
+
+static inline void setr(int type, struct rlimit *rlim)
+{
+	if (setrlimit(type, rlim))
+		shutdown(7, "setrlimit()", __LINE__ - 1);
+}
+
+void validate_current_settings()
+{
+	int rlim_needed;
+
+	if (cur_limits.rlim_cur < 4096) {
+		printf("Current rlimit value for POSIX message queue bytes is "
+		       "unreasonably low,\nincreasing.\n\n");
+		cur_limits.rlim_cur = 8192;
+		cur_limits.rlim_max = 16384;
+		setr(RLIMIT_MSGQUEUE, &cur_limits);
+	}
+
+	if (default_settings) {
+		rlim_needed = (cur_def_msgs + 1) * (cur_def_msgsize + 1 +
+						    2 * sizeof(void *));
+		if (rlim_needed > cur_limits.rlim_cur) {
+			printf("Temporarily lowering default queue parameters "
+			       "to something that will work\n"
+			       "with the current rlimit values.\n\n");
+			set(def_msgs, 10);
+			cur_def_msgs = 10;
+			set(def_msgsize, 128);
+			cur_def_msgsize = 128;
+		}
+	} else {
+		rlim_needed = (cur_max_msgs + 1) * (cur_max_msgsize + 1 +
+						    2 * sizeof(void *));
+		if (rlim_needed > cur_limits.rlim_cur) {
+			printf("Temporarily lowering maximum queue parameters "
+			       "to something that will work\n"
+			       "with the current rlimit values in case this is "
+			       "a kernel that ties the default\n"
+			       "queue parameters to the maximum queue "
+			       "parameters.\n\n");
+			set(max_msgs, 10);
+			cur_max_msgs = 10;
+			set(max_msgsize, 128);
+			cur_max_msgsize = 128;
+		}
+	}
+}
+
+/*
+ * test_queue - Test opening a queue, shutdown if we fail.  This should
+ * only be called in situations that should never fail.  We clean up
+ * after ourselves and return the queue attributes in *result.
+ */
+static inline void test_queue(struct mq_attr *attr, struct mq_attr *result)
+{
+	int flags = O_RDWR | O_EXCL | O_CREAT;
+	int perms = DEFFILEMODE;
+
+	if ((queue = mq_open(queue_path, flags, perms, attr)) == -1)
+		shutdown(1, "mq_open()", __LINE__);
+	if (mq_getattr(queue, result))
+		shutdown(1, "mq_getattr()", __LINE__);
+	if (mq_close(queue))
+		shutdown(1, "mq_close()", __LINE__);
+	queue = -1;
+	if (mq_unlink(queue_path))
+		shutdown(1, "mq_unlink()", __LINE__);
+}
+
+/*
+ * Same as test_queue above, but failure is not fatal.
+ * Returns:
+ * 0 - Failed to create a queue
+ * 1 - Created a queue, attributes in *result
+ */
+static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result)
+{
+	int flags = O_RDWR | O_EXCL | O_CREAT;
+	int perms = DEFFILEMODE;
+
+	if ((queue = mq_open(queue_path, flags, perms, attr)) == -1)
+		return 0;
+	if (mq_getattr(queue, result))
+		shutdown(1, "mq_getattr()", __LINE__);
+	if (mq_close(queue))
+		shutdown(1, "mq_close()", __LINE__);
+	queue = -1;
+	if (mq_unlink(queue_path))
+		shutdown(1, "mq_unlink()", __LINE__);
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	struct mq_attr attr, result;
+
+	if (argc != 2) {
+		printf("Using Default queue path - %s\n", default_queue_path);
+		queue_path = default_queue_path;
+	} else {
+
+	/*
+	 * Although we can create a msg queue with a non-absolute path name,
+	 * unlink will fail.  So, if the name doesn't start with a /, add one
+	 * when we save it.
+	 */
+		if (*argv[1] == '/')
+			queue_path = strdup(argv[1]);
+		else {
+			queue_path = malloc(strlen(argv[1]) + 2);
+			if (!queue_path) {
+				perror("malloc()");
+				exit(1);
+			}
+			queue_path[0] = '/';
+			queue_path[1] = 0;
+			strcat(queue_path, argv[1]);
+		}
+	}
+
+	if (getuid() != 0)
+		ksft_exit_skip("Not running as root, but almost all tests "
+			"require root in order to modify\nsystem settings.  "
+			"Exiting.\n");
+
+	/* Find out what files there are for us to make tweaks in */
+	def_msgs = fopen(DEF_MSGS, "r+");
+	def_msgsize = fopen(DEF_MSGSIZE, "r+");
+	max_msgs = fopen(MAX_MSGS, "r+");
+	max_msgsize = fopen(MAX_MSGSIZE, "r+");
+
+	if (!max_msgs)
+		shutdown(2, "Failed to open msg_max", __LINE__);
+	if (!max_msgsize)
+		shutdown(2, "Failed to open msgsize_max", __LINE__);
+	if (def_msgs || def_msgsize)
+		default_settings = 1;
+
+	/* Load up the current system values for everything we can */
+	getr(RLIMIT_MSGQUEUE, &saved_limits);
+	cur_limits = saved_limits;
+	if (default_settings) {
+		saved_def_msgs = cur_def_msgs = get(def_msgs);
+		saved_def_msgsize = cur_def_msgsize = get(def_msgsize);
+	}
+	saved_max_msgs = cur_max_msgs = get(max_msgs);
+	saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
+
+	/* Tell the user our initial state */
+	printf("\nInitial system state:\n");
+	printf("\tUsing queue path:\t\t%s\n", queue_path);
+	printf("\tRLIMIT_MSGQUEUE(soft):\t\t%ld\n",
+		(long) saved_limits.rlim_cur);
+	printf("\tRLIMIT_MSGQUEUE(hard):\t\t%ld\n",
+		(long) saved_limits.rlim_max);
+	printf("\tMaximum Message Size:\t\t%d\n", saved_max_msgsize);
+	printf("\tMaximum Queue Size:\t\t%d\n", saved_max_msgs);
+	if (default_settings) {
+		printf("\tDefault Message Size:\t\t%d\n", saved_def_msgsize);
+		printf("\tDefault Queue Size:\t\t%d\n", saved_def_msgs);
+	} else {
+		printf("\tDefault Message Size:\t\tNot Supported\n");
+		printf("\tDefault Queue Size:\t\tNot Supported\n");
+	}
+	printf("\n");
+
+	validate_current_settings();
+
+	printf("Adjusted system state for testing:\n");
+	printf("\tRLIMIT_MSGQUEUE(soft):\t\t%ld\n", (long) cur_limits.rlim_cur);
+	printf("\tRLIMIT_MSGQUEUE(hard):\t\t%ld\n", (long) cur_limits.rlim_max);
+	printf("\tMaximum Message Size:\t\t%d\n", cur_max_msgsize);
+	printf("\tMaximum Queue Size:\t\t%d\n", cur_max_msgs);
+	if (default_settings) {
+		printf("\tDefault Message Size:\t\t%d\n", cur_def_msgsize);
+		printf("\tDefault Queue Size:\t\t%d\n", cur_def_msgs);
+	}
+
+	printf("\n\nTest series 1, behavior when no attr struct "
+	       "passed to mq_open:\n");
+	if (!default_settings) {
+		test_queue(NULL, &result);
+		printf("Given sane system settings, mq_open without an attr "
+		       "struct succeeds:\tPASS\n");
+		if (result.mq_maxmsg != cur_max_msgs ||
+		    result.mq_msgsize != cur_max_msgsize) {
+			printf("Kernel does not support setting the default "
+			       "mq attributes,\nbut also doesn't tie the "
+			       "defaults to the maximums:\t\t\tPASS\n");
+		} else {
+			set(max_msgs, ++cur_max_msgs);
+			set(max_msgsize, ++cur_max_msgsize);
+			test_queue(NULL, &result);
+			if (result.mq_maxmsg == cur_max_msgs &&
+			    result.mq_msgsize == cur_max_msgsize)
+				printf("Kernel does not support setting the "
+				       "default mq attributes and\n"
+				       "also ties system wide defaults to "
+				       "the system wide maximums:\t\t"
+				       "FAIL\n");
+			else
+				printf("Kernel does not support setting the "
+				       "default mq attributes,\n"
+				       "but also doesn't tie the defaults to "
+				       "the maximums:\t\t\tPASS\n");
+		}
+	} else {
+		printf("Kernel supports setting defaults separately from "
+		       "maximums:\t\tPASS\n");
+		/*
+		 * While we are here, go ahead and test that the kernel
+		 * properly follows the default settings
+		 */
+		test_queue(NULL, &result);
+		printf("Given sane values, mq_open without an attr struct "
+		       "succeeds:\t\tPASS\n");
+		if (result.mq_maxmsg != cur_def_msgs ||
+		    result.mq_msgsize != cur_def_msgsize)
+			printf("Kernel supports setting defaults, but does "
+			       "not actually honor them:\tFAIL\n\n");
+		else {
+			set(def_msgs, ++cur_def_msgs);
+			set(def_msgsize, ++cur_def_msgsize);
+			/* In case max was the same as the default */
+			set(max_msgs, ++cur_max_msgs);
+			set(max_msgsize, ++cur_max_msgsize);
+			test_queue(NULL, &result);
+			if (result.mq_maxmsg != cur_def_msgs ||
+			    result.mq_msgsize != cur_def_msgsize)
+				printf("Kernel supports setting defaults, but "
+				       "does not actually honor them:\t"
+				       "FAIL\n");
+			else
+				printf("Kernel properly honors default setting "
+				       "knobs:\t\t\t\tPASS\n");
+		}
+		set(def_msgs, cur_max_msgs + 1);
+		cur_def_msgs = cur_max_msgs + 1;
+		set(def_msgsize, cur_max_msgsize + 1);
+		cur_def_msgsize = cur_max_msgsize + 1;
+		if (cur_def_msgs * (cur_def_msgsize + 2 * sizeof(void *)) >=
+		    cur_limits.rlim_cur) {
+			cur_limits.rlim_cur = (cur_def_msgs + 2) *
+				(cur_def_msgsize + 2 * sizeof(void *));
+			cur_limits.rlim_max = 2 * cur_limits.rlim_cur;
+			setr(RLIMIT_MSGQUEUE, &cur_limits);
+		}
+		if (test_queue_fail(NULL, &result)) {
+			if (result.mq_maxmsg == cur_max_msgs &&
+			    result.mq_msgsize == cur_max_msgsize)
+				printf("Kernel properly limits default values "
+				       "to lesser of default/max:\t\tPASS\n");
+			else
+				printf("Kernel does not properly set default "
+				       "queue parameters when\ndefaults > "
+				       "max:\t\t\t\t\t\t\t\tFAIL\n");
+		} else
+			printf("Kernel fails to open mq because defaults are "
+			       "greater than maximums:\tFAIL\n");
+		set(def_msgs, --cur_def_msgs);
+		set(def_msgsize, --cur_def_msgsize);
+		cur_limits.rlim_cur = cur_limits.rlim_max = cur_def_msgs *
+			cur_def_msgsize;
+		setr(RLIMIT_MSGQUEUE, &cur_limits);
+		if (test_queue_fail(NULL, &result))
+			printf("Kernel creates queue even though defaults "
+			       "would exceed\nrlimit setting:"
+			       "\t\t\t\t\t\t\t\tFAIL\n");
+		else
+			printf("Kernel properly fails to create queue when "
+			       "defaults would\nexceed rlimit:"
+			       "\t\t\t\t\t\t\t\tPASS\n");
+	}
+
+	/*
+	 * Test #2 - open with an attr struct that exceeds rlimit
+	 */
+	printf("\n\nTest series 2, behavior when attr struct is "
+	       "passed to mq_open:\n");
+	cur_max_msgs = 32;
+	cur_max_msgsize = cur_limits.rlim_max >> 4;
+	set(max_msgs, cur_max_msgs);
+	set(max_msgsize, cur_max_msgsize);
+	attr.mq_maxmsg = cur_max_msgs;
+	attr.mq_msgsize = cur_max_msgsize;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open in excess of rlimit max when euid = 0 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open in excess of rlimit max when euid = 0 "
+		       "failed:\t\tPASS\n");
+	attr.mq_maxmsg = cur_max_msgs + 1;
+	attr.mq_msgsize = 10;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with mq_maxmsg > limit when euid = 0 "
+		       "succeeded:\t\tPASS\n");
+	else
+		printf("Queue open with mq_maxmsg > limit when euid = 0 "
+		       "failed:\t\tFAIL\n");
+	attr.mq_maxmsg = 1;
+	attr.mq_msgsize = cur_max_msgsize + 1;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with mq_msgsize > limit when euid = 0 "
+		       "succeeded:\t\tPASS\n");
+	else
+		printf("Queue open with mq_msgsize > limit when euid = 0 "
+		       "failed:\t\tFAIL\n");
+	attr.mq_maxmsg = 65536;
+	attr.mq_msgsize = 65536;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with total size > 2GB when euid = 0 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open with total size > 2GB when euid = 0 "
+		       "failed:\t\t\tPASS\n");
+
+	if (seteuid(99) == -1) {
+		perror("seteuid() failed");
+		exit(1);
+	}
+
+	attr.mq_maxmsg = cur_max_msgs;
+	attr.mq_msgsize = cur_max_msgsize;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open in excess of rlimit max when euid = 99 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open in excess of rlimit max when euid = 99 "
+		       "failed:\t\tPASS\n");
+	attr.mq_maxmsg = cur_max_msgs + 1;
+	attr.mq_msgsize = 10;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with mq_maxmsg > limit when euid = 99 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open with mq_maxmsg > limit when euid = 99 "
+		       "failed:\t\tPASS\n");
+	attr.mq_maxmsg = 1;
+	attr.mq_msgsize = cur_max_msgsize + 1;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with mq_msgsize > limit when euid = 99 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open with mq_msgsize > limit when euid = 99 "
+		       "failed:\t\tPASS\n");
+	attr.mq_maxmsg = 65536;
+	attr.mq_msgsize = 65536;
+	if (test_queue_fail(&attr, &result))
+		printf("Queue open with total size > 2GB when euid = 99 "
+		       "succeeded:\t\tFAIL\n");
+	else
+		printf("Queue open with total size > 2GB when euid = 99 "
+		       "failed:\t\t\tPASS\n");
+
+	shutdown(0,"",0);
+}
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
new file mode 100644
index 000000000..84fda3b49
--- /dev/null
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -0,0 +1,752 @@
+/*
+ * This application is Copyright 2012 Red Hat, Inc.
+ *	Doug Ledford <dledford@redhat.com>
+ *
+ * mq_perf_tests is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * mq_perf_tests is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * For the full text of the license, see <http://www.gnu.org/licenses/>.
+ *
+ * mq_perf_tests.c
+ *   Tests various types of message queue workloads, concentrating on those
+ *   situations that invole large message sizes, large message queue depths,
+ *   or both, and reports back useful metrics about kernel message queue
+ *   performance.
+ *
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <mqueue.h>
+#include <popt.h>
+#include <error.h>
+
+#include "../kselftest.h"
+
+static char *usage =
+"Usage:\n"
+"  %s [-c #[,#..] -f] path\n"
+"\n"
+"	-c #	Skip most tests and go straight to a high queue depth test\n"
+"		and then run that test continuously (useful for running at\n"
+"		the same time as some other workload to see how much the\n"
+"		cache thrashing caused by adding messages to a very deep\n"
+"		queue impacts the performance of other programs).  The number\n"
+"		indicates which CPU core we should bind the process to during\n"
+"		the run.  If you have more than one physical CPU, then you\n"
+"		will need one copy per physical CPU package, and you should\n"
+"		specify the CPU cores to pin ourself to via a comma separated\n"
+"		list of CPU values.\n"
+"	-f	Only usable with continuous mode.  Pin ourself to the CPUs\n"
+"		as requested, then instead of looping doing a high mq\n"
+"		workload, just busy loop.  This will allow us to lock up a\n"
+"		single CPU just like we normally would, but without actually\n"
+"		thrashing the CPU cache.  This is to make it easier to get\n"
+"		comparable numbers from some other workload running on the\n"
+"		other CPUs.  One set of numbers with # CPUs locked up running\n"
+"		an mq workload, and another set of numbers with those same\n"
+"		CPUs locked away from the test workload, but not doing\n"
+"		anything to trash the cache like the mq workload might.\n"
+"	path	Path name of the message queue to create\n"
+"\n"
+"	Note: this program must be run as root in order to enable all tests\n"
+"\n";
+
+char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
+char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define MAX_CPUS 64
+char *cpu_option_string;
+int cpus_to_pin[MAX_CPUS];
+int num_cpus_to_pin;
+pthread_t cpu_threads[MAX_CPUS];
+pthread_t main_thread;
+cpu_set_t *cpu_set;
+int cpu_set_size;
+int cpus_online;
+
+#define MSG_SIZE 16
+#define TEST1_LOOPS 10000000
+#define TEST2_LOOPS 100000
+int continuous_mode;
+int continuous_mode_fake;
+
+struct rlimit saved_limits, cur_limits;
+int saved_max_msgs, saved_max_msgsize;
+int cur_max_msgs, cur_max_msgsize;
+FILE *max_msgs, *max_msgsize;
+int cur_nice;
+char *queue_path = "/mq_perf_tests";
+mqd_t queue = -1;
+struct mq_attr result;
+int mq_prio_max;
+
+const struct poptOption options[] = {
+	{
+		.longName = "continuous",
+		.shortName = 'c',
+		.argInfo = POPT_ARG_STRING,
+		.arg = &cpu_option_string,
+		.val = 'c',
+		.descrip = "Run continuous tests at a high queue depth in "
+			"order to test the effects of cache thrashing on "
+			"other tasks on the system.  This test is intended "
+			"to be run on one core of each physical CPU while "
+			"some other CPU intensive task is run on all the other "
+			"cores of that same physical CPU and the other task "
+			"is timed.  It is assumed that the process of adding "
+			"messages to the message queue in a tight loop will "
+			"impact that other task to some degree.  Once the "
+			"tests are performed in this way, you should then "
+			"re-run the tests using fake mode in order to check "
+			"the difference in time required to perform the CPU "
+			"intensive task",
+		.argDescrip = "cpu[,cpu]",
+	},
+	{
+		.longName = "fake",
+		.shortName = 'f',
+		.argInfo = POPT_ARG_NONE,
+		.arg = &continuous_mode_fake,
+		.val = 0,
+		.descrip = "Tie up the CPUs that we would normally tie up in"
+			"continuous mode, but don't actually do any mq stuff, "
+			"just keep the CPU busy so it can't be used to process "
+			"system level tasks as this would free up resources on "
+			"the other CPU cores and skew the comparison between "
+			"the no-mqueue work and mqueue work tests",
+		.argDescrip = NULL,
+	},
+	{
+		.longName = "path",
+		.shortName = 'p',
+		.argInfo = POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT,
+		.arg = &queue_path,
+		.val = 'p',
+		.descrip = "The name of the path to use in the mqueue "
+			"filesystem for our tests",
+		.argDescrip = "pathname",
+	},
+	POPT_AUTOHELP
+	POPT_TABLEEND
+};
+
+static inline void __set(FILE *stream, int value, char *err_msg);
+void shutdown(int exit_val, char *err_cause, int line_no);
+void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context);
+void sig_action(int signum, siginfo_t *info, void *context);
+static inline int get(FILE *stream);
+static inline void set(FILE *stream, int value);
+static inline int try_set(FILE *stream, int value);
+static inline void getr(int type, struct rlimit *rlim);
+static inline void setr(int type, struct rlimit *rlim);
+static inline void open_queue(struct mq_attr *attr);
+void increase_limits(void);
+
+static inline void __set(FILE *stream, int value, char *err_msg)
+{
+	rewind(stream);
+	if (fprintf(stream, "%d", value) < 0)
+		perror(err_msg);
+}
+
+
+void shutdown(int exit_val, char *err_cause, int line_no)
+{
+	static int in_shutdown = 0;
+	int errno_at_shutdown = errno;
+	int i;
+
+	/* In case we get called by multiple threads or from an sighandler */
+	if (in_shutdown++)
+		return;
+
+	/* Free the cpu_set allocated using CPU_ALLOC in main function */
+	CPU_FREE(cpu_set);
+
+	for (i = 0; i < num_cpus_to_pin; i++)
+		if (cpu_threads[i]) {
+			pthread_kill(cpu_threads[i], SIGUSR1);
+			pthread_join(cpu_threads[i], NULL);
+		}
+
+	if (queue != -1)
+		if (mq_close(queue))
+			perror("mq_close() during shutdown");
+	if (queue_path)
+		/*
+		 * Be silent if this fails, if we cleaned up already it's
+		 * expected to fail
+		 */
+		mq_unlink(queue_path);
+	if (saved_max_msgs)
+		__set(max_msgs, saved_max_msgs,
+		      "failed to restore saved_max_msgs");
+	if (saved_max_msgsize)
+		__set(max_msgsize, saved_max_msgsize,
+		      "failed to restore saved_max_msgsize");
+	if (exit_val)
+		error(exit_val, errno_at_shutdown, "%s at %d",
+		      err_cause, line_no);
+	exit(0);
+}
+
+void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context)
+{
+	if (pthread_self() != main_thread)
+		pthread_exit(0);
+	else {
+		fprintf(stderr, "Caught signal %d in SIGUSR1 handler, "
+				"exiting\n", signum);
+		shutdown(0, "", 0);
+		fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
+		exit(0);
+	}
+}
+
+void sig_action(int signum, siginfo_t *info, void *context)
+{
+	if (pthread_self() != main_thread)
+		pthread_kill(main_thread, signum);
+	else {
+		fprintf(stderr, "Caught signal %d, exiting\n", signum);
+		shutdown(0, "", 0);
+		fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
+		exit(0);
+	}
+}
+
+static inline int get(FILE *stream)
+{
+	int value;
+	rewind(stream);
+	if (fscanf(stream, "%d", &value) != 1)
+		shutdown(4, "Error reading /proc entry", __LINE__);
+	return value;
+}
+
+static inline void set(FILE *stream, int value)
+{
+	int new_value;
+
+	rewind(stream);
+	if (fprintf(stream, "%d", value) < 0)
+		return shutdown(5, "Failed writing to /proc file", __LINE__);
+	new_value = get(stream);
+	if (new_value != value)
+		return shutdown(5, "We didn't get what we wrote to /proc back",
+				__LINE__);
+}
+
+static inline int try_set(FILE *stream, int value)
+{
+	int new_value;
+
+	rewind(stream);
+	fprintf(stream, "%d", value);
+	new_value = get(stream);
+	return new_value == value;
+}
+
+static inline void getr(int type, struct rlimit *rlim)
+{
+	if (getrlimit(type, rlim))
+		shutdown(6, "getrlimit()", __LINE__);
+}
+
+static inline void setr(int type, struct rlimit *rlim)
+{
+	if (setrlimit(type, rlim))
+		shutdown(7, "setrlimit()", __LINE__);
+}
+
+/**
+ * open_queue - open the global queue for testing
+ * @attr - An attr struct specifying the desired queue traits
+ * @result - An attr struct that lists the actual traits the queue has
+ *
+ * This open is not allowed to fail, failure will result in an orderly
+ * shutdown of the program.  The global queue_path is used to set what
+ * queue to open, the queue descriptor is saved in the global queue
+ * variable.
+ */
+static inline void open_queue(struct mq_attr *attr)
+{
+	int flags = O_RDWR | O_EXCL | O_CREAT | O_NONBLOCK;
+	int perms = DEFFILEMODE;
+
+	queue = mq_open(queue_path, flags, perms, attr);
+	if (queue == -1)
+		shutdown(1, "mq_open()", __LINE__);
+	if (mq_getattr(queue, &result))
+		shutdown(1, "mq_getattr()", __LINE__);
+	printf("\n\tQueue %s created:\n", queue_path);
+	printf("\t\tmq_flags:\t\t\t%s\n", result.mq_flags & O_NONBLOCK ?
+	       "O_NONBLOCK" : "(null)");
+	printf("\t\tmq_maxmsg:\t\t\t%lu\n", result.mq_maxmsg);
+	printf("\t\tmq_msgsize:\t\t\t%lu\n", result.mq_msgsize);
+	printf("\t\tmq_curmsgs:\t\t\t%lu\n", result.mq_curmsgs);
+}
+
+void *fake_cont_thread(void *arg)
+{
+	int i;
+
+	for (i = 0; i < num_cpus_to_pin; i++)
+		if (cpu_threads[i] == pthread_self())
+			break;
+	printf("\tStarted fake continuous mode thread %d on CPU %d\n", i,
+	       cpus_to_pin[i]);
+	while (1)
+		;
+}
+
+void *cont_thread(void *arg)
+{
+	char buff[MSG_SIZE];
+	int i, priority;
+
+	for (i = 0; i < num_cpus_to_pin; i++)
+		if (cpu_threads[i] == pthread_self())
+			break;
+	printf("\tStarted continuous mode thread %d on CPU %d\n", i,
+	       cpus_to_pin[i]);
+	while (1) {
+		while (mq_send(queue, buff, sizeof(buff), 0) == 0)
+			;
+		mq_receive(queue, buff, sizeof(buff), &priority);
+	}
+}
+
+#define drain_queue() \
+	while (mq_receive(queue, buff, MSG_SIZE, &prio_in) == MSG_SIZE)
+
+#define do_untimed_send() \
+	do { \
+		if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
+			shutdown(3, "Test send failure", __LINE__); \
+	} while (0)
+
+#define do_send_recv() \
+	do { \
+		clock_gettime(clock, &start); \
+		if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
+			shutdown(3, "Test send failure", __LINE__); \
+		clock_gettime(clock, &middle); \
+		if (mq_receive(queue, buff, MSG_SIZE, &prio_in) != MSG_SIZE) \
+			shutdown(3, "Test receive failure", __LINE__); \
+		clock_gettime(clock, &end); \
+		nsec = ((middle.tv_sec - start.tv_sec) * 1000000000) + \
+			(middle.tv_nsec - start.tv_nsec); \
+		send_total.tv_nsec += nsec; \
+		if (send_total.tv_nsec >= 1000000000) { \
+			send_total.tv_sec++; \
+			send_total.tv_nsec -= 1000000000; \
+		} \
+		nsec = ((end.tv_sec - middle.tv_sec) * 1000000000) + \
+			(end.tv_nsec - middle.tv_nsec); \
+		recv_total.tv_nsec += nsec; \
+		if (recv_total.tv_nsec >= 1000000000) { \
+			recv_total.tv_sec++; \
+			recv_total.tv_nsec -= 1000000000; \
+		} \
+	} while (0)
+
+struct test {
+	char *desc;
+	void (*func)(int *);
+};
+
+void const_prio(int *prio)
+{
+	return;
+}
+
+void inc_prio(int *prio)
+{
+	if (++*prio == mq_prio_max)
+		*prio = 0;
+}
+
+void dec_prio(int *prio)
+{
+	if (--*prio < 0)
+		*prio = mq_prio_max - 1;
+}
+
+void random_prio(int *prio)
+{
+	*prio = random() % mq_prio_max;
+}
+
+struct test test2[] = {
+	{"\n\tTest #2a: Time send/recv message, queue full, constant prio\n",
+		const_prio},
+	{"\n\tTest #2b: Time send/recv message, queue full, increasing prio\n",
+		inc_prio},
+	{"\n\tTest #2c: Time send/recv message, queue full, decreasing prio\n",
+		dec_prio},
+	{"\n\tTest #2d: Time send/recv message, queue full, random prio\n",
+		random_prio},
+	{NULL, NULL}
+};
+
+/**
+ * Tests to perform (all done with MSG_SIZE messages):
+ *
+ * 1) Time to add/remove message with 0 messages on queue
+ * 1a) with constant prio
+ * 2) Time to add/remove message when queue close to capacity:
+ * 2a) with constant prio
+ * 2b) with increasing prio
+ * 2c) with decreasing prio
+ * 2d) with random prio
+ * 3) Test limits of priorities honored (double check _SC_MQ_PRIO_MAX)
+ */
+void *perf_test_thread(void *arg)
+{
+	char buff[MSG_SIZE];
+	int prio_out, prio_in;
+	int i;
+	clockid_t clock;
+	pthread_t *t;
+	struct timespec res, start, middle, end, send_total, recv_total;
+	unsigned long long nsec;
+	struct test *cur_test;
+
+	t = &cpu_threads[0];
+	printf("\n\tStarted mqueue performance test thread on CPU %d\n",
+	       cpus_to_pin[0]);
+	mq_prio_max = sysconf(_SC_MQ_PRIO_MAX);
+	if (mq_prio_max == -1)
+		shutdown(2, "sysconf(_SC_MQ_PRIO_MAX)", __LINE__);
+	if (pthread_getcpuclockid(cpu_threads[0], &clock) != 0)
+		shutdown(2, "pthread_getcpuclockid", __LINE__);
+
+	if (clock_getres(clock, &res))
+		shutdown(2, "clock_getres()", __LINE__);
+
+	printf("\t\tMax priorities:\t\t\t%d\n", mq_prio_max);
+	printf("\t\tClock resolution:\t\t%lu nsec%s\n", res.tv_nsec,
+	       res.tv_nsec > 1 ? "s" : "");
+
+
+
+	printf("\n\tTest #1: Time send/recv message, queue empty\n");
+	printf("\t\t(%d iterations)\n", TEST1_LOOPS);
+	prio_out = 0;
+	send_total.tv_sec = 0;
+	send_total.tv_nsec = 0;
+	recv_total.tv_sec = 0;
+	recv_total.tv_nsec = 0;
+	for (i = 0; i < TEST1_LOOPS; i++)
+		do_send_recv();
+	printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
+	       send_total.tv_sec, send_total.tv_nsec);
+	nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
+		 send_total.tv_nsec) / TEST1_LOOPS;
+	printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+	printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
+	       recv_total.tv_sec, recv_total.tv_nsec);
+	nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
+		recv_total.tv_nsec) / TEST1_LOOPS;
+	printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+
+
+	for (cur_test = test2; cur_test->desc != NULL; cur_test++) {
+		printf("%s:\n", cur_test->desc);
+		printf("\t\t(%d iterations)\n", TEST2_LOOPS);
+		prio_out = 0;
+		send_total.tv_sec = 0;
+		send_total.tv_nsec = 0;
+		recv_total.tv_sec = 0;
+		recv_total.tv_nsec = 0;
+		printf("\t\tFilling queue...");
+		fflush(stdout);
+		clock_gettime(clock, &start);
+		for (i = 0; i < result.mq_maxmsg - 1; i++) {
+			do_untimed_send();
+			cur_test->func(&prio_out);
+		}
+		clock_gettime(clock, &end);
+		nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
+			1000000000) + (end.tv_nsec - start.tv_nsec);
+		printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
+		       nsec % 1000000000);
+		printf("\t\tTesting...");
+		fflush(stdout);
+		for (i = 0; i < TEST2_LOOPS; i++) {
+			do_send_recv();
+			cur_test->func(&prio_out);
+		}
+		printf("done.\n");
+		printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
+		       send_total.tv_sec, send_total.tv_nsec);
+		nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
+			 send_total.tv_nsec) / TEST2_LOOPS;
+		printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+		printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
+		       recv_total.tv_sec, recv_total.tv_nsec);
+		nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
+			recv_total.tv_nsec) / TEST2_LOOPS;
+		printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+		printf("\t\tDraining queue...");
+		fflush(stdout);
+		clock_gettime(clock, &start);
+		drain_queue();
+		clock_gettime(clock, &end);
+		nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
+			1000000000) + (end.tv_nsec - start.tv_nsec);
+		printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
+		       nsec % 1000000000);
+	}
+	return 0;
+}
+
+void increase_limits(void)
+{
+	cur_limits.rlim_cur = RLIM_INFINITY;
+	cur_limits.rlim_max = RLIM_INFINITY;
+	setr(RLIMIT_MSGQUEUE, &cur_limits);
+	while (try_set(max_msgs, cur_max_msgs += 10))
+		;
+	cur_max_msgs = get(max_msgs);
+	while (try_set(max_msgsize, cur_max_msgsize += 1024))
+		;
+	cur_max_msgsize = get(max_msgsize);
+	if (setpriority(PRIO_PROCESS, 0, -20) != 0)
+		shutdown(2, "setpriority()", __LINE__);
+	cur_nice = -20;
+}
+
+int main(int argc, char *argv[])
+{
+	struct mq_attr attr;
+	char *option, *next_option;
+	int i, cpu, rc;
+	struct sigaction sa;
+	poptContext popt_context;
+	void *retval;
+
+	main_thread = pthread_self();
+	num_cpus_to_pin = 0;
+
+	if (sysconf(_SC_NPROCESSORS_ONLN) == -1) {
+		perror("sysconf(_SC_NPROCESSORS_ONLN)");
+		exit(1);
+	}
+
+	if (getuid() != 0)
+		ksft_exit_skip("Not running as root, but almost all tests "
+			"require root in order to modify\nsystem settings.  "
+			"Exiting.\n");
+
+	cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
+	cpu_set = CPU_ALLOC(cpus_online);
+	if (cpu_set == NULL) {
+		perror("CPU_ALLOC()");
+		exit(1);
+	}
+	cpu_set_size = CPU_ALLOC_SIZE(cpus_online);
+	CPU_ZERO_S(cpu_set_size, cpu_set);
+
+	popt_context = poptGetContext(NULL, argc, (const char **)argv,
+				      options, 0);
+
+	while ((rc = poptGetNextOpt(popt_context)) > 0) {
+		switch (rc) {
+		case 'c':
+			continuous_mode = 1;
+			option = cpu_option_string;
+			do {
+				next_option = strchr(option, ',');
+				if (next_option)
+					*next_option = '\0';
+				cpu = atoi(option);
+				if (cpu >= cpus_online)
+					fprintf(stderr, "CPU %d exceeds "
+						"cpus online, ignoring.\n",
+						cpu);
+				else
+					cpus_to_pin[num_cpus_to_pin++] = cpu;
+				if (next_option)
+					option = ++next_option;
+			} while (next_option && num_cpus_to_pin < MAX_CPUS);
+			/* Double check that they didn't give us the same CPU
+			 * more than once */
+			for (cpu = 0; cpu < num_cpus_to_pin; cpu++) {
+				if (CPU_ISSET_S(cpus_to_pin[cpu], cpu_set_size,
+						cpu_set)) {
+					fprintf(stderr, "Any given CPU may "
+						"only be given once.\n");
+					goto err_code;
+				} else
+					CPU_SET_S(cpus_to_pin[cpu],
+						  cpu_set_size, cpu_set);
+			}
+			break;
+		case 'p':
+			/*
+			 * Although we can create a msg queue with a
+			 * non-absolute path name, unlink will fail.  So,
+			 * if the name doesn't start with a /, add one
+			 * when we save it.
+			 */
+			option = queue_path;
+			if (*option != '/') {
+				queue_path = malloc(strlen(option) + 2);
+				if (!queue_path) {
+					perror("malloc()");
+					goto err_code;
+				}
+				queue_path[0] = '/';
+				queue_path[1] = 0;
+				strcat(queue_path, option);
+				free(option);
+			}
+			break;
+		}
+	}
+
+	if (continuous_mode && num_cpus_to_pin == 0) {
+		fprintf(stderr, "Must pass at least one CPU to continuous "
+			"mode.\n");
+		poptPrintUsage(popt_context, stderr, 0);
+		goto err_code;
+	} else if (!continuous_mode) {
+		num_cpus_to_pin = 1;
+		cpus_to_pin[0] = cpus_online - 1;
+	}
+
+	max_msgs = fopen(MAX_MSGS, "r+");
+	max_msgsize = fopen(MAX_MSGSIZE, "r+");
+	if (!max_msgs)
+		shutdown(2, "Failed to open msg_max", __LINE__);
+	if (!max_msgsize)
+		shutdown(2, "Failed to open msgsize_max", __LINE__);
+
+	/* Load up the current system values for everything we can */
+	getr(RLIMIT_MSGQUEUE, &saved_limits);
+	cur_limits = saved_limits;
+	saved_max_msgs = cur_max_msgs = get(max_msgs);
+	saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
+	errno = 0;
+	cur_nice = getpriority(PRIO_PROCESS, 0);
+	if (errno)
+		shutdown(2, "getpriority()", __LINE__);
+
+	/* Tell the user our initial state */
+	printf("\nInitial system state:\n");
+	printf("\tUsing queue path:\t\t\t%s\n", queue_path);
+	printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
+		(long) saved_limits.rlim_cur);
+	printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
+		(long) saved_limits.rlim_max);
+	printf("\tMaximum Message Size:\t\t\t%d\n", saved_max_msgsize);
+	printf("\tMaximum Queue Size:\t\t\t%d\n", saved_max_msgs);
+	printf("\tNice value:\t\t\t\t%d\n", cur_nice);
+	printf("\n");
+
+	increase_limits();
+
+	printf("Adjusted system state for testing:\n");
+	if (cur_limits.rlim_cur == RLIM_INFINITY) {
+		printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t(unlimited)\n");
+		printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t(unlimited)\n");
+	} else {
+		printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
+		       (long) cur_limits.rlim_cur);
+		printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
+		       (long) cur_limits.rlim_max);
+	}
+	printf("\tMaximum Message Size:\t\t\t%d\n", cur_max_msgsize);
+	printf("\tMaximum Queue Size:\t\t\t%d\n", cur_max_msgs);
+	printf("\tNice value:\t\t\t\t%d\n", cur_nice);
+	printf("\tContinuous mode:\t\t\t(%s)\n", continuous_mode ?
+	       (continuous_mode_fake ? "fake mode" : "enabled") :
+	       "disabled");
+	printf("\tCPUs to pin:\t\t\t\t%d", cpus_to_pin[0]);
+	for (cpu = 1; cpu < num_cpus_to_pin; cpu++)
+			printf(",%d", cpus_to_pin[cpu]);
+	printf("\n");
+
+	sa.sa_sigaction = sig_action_SIGUSR1;
+	sigemptyset(&sa.sa_mask);
+	sigaddset(&sa.sa_mask, SIGHUP);
+	sigaddset(&sa.sa_mask, SIGINT);
+	sigaddset(&sa.sa_mask, SIGQUIT);
+	sigaddset(&sa.sa_mask, SIGTERM);
+	sa.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGUSR1, &sa, NULL) == -1)
+		shutdown(1, "sigaction(SIGUSR1)", __LINE__);
+	sa.sa_sigaction = sig_action;
+	if (sigaction(SIGHUP, &sa, NULL) == -1)
+		shutdown(1, "sigaction(SIGHUP)", __LINE__);
+	if (sigaction(SIGINT, &sa, NULL) == -1)
+		shutdown(1, "sigaction(SIGINT)", __LINE__);
+	if (sigaction(SIGQUIT, &sa, NULL) == -1)
+		shutdown(1, "sigaction(SIGQUIT)", __LINE__);
+	if (sigaction(SIGTERM, &sa, NULL) == -1)
+		shutdown(1, "sigaction(SIGTERM)", __LINE__);
+
+	if (!continuous_mode_fake) {
+		attr.mq_flags = O_NONBLOCK;
+		attr.mq_maxmsg = cur_max_msgs;
+		attr.mq_msgsize = MSG_SIZE;
+		open_queue(&attr);
+	}
+	for (i = 0; i < num_cpus_to_pin; i++) {
+		pthread_attr_t thread_attr;
+		void *thread_func;
+
+		if (continuous_mode_fake)
+			thread_func = &fake_cont_thread;
+		else if (continuous_mode)
+			thread_func = &cont_thread;
+		else
+			thread_func = &perf_test_thread;
+
+		CPU_ZERO_S(cpu_set_size, cpu_set);
+		CPU_SET_S(cpus_to_pin[i], cpu_set_size, cpu_set);
+		pthread_attr_init(&thread_attr);
+		pthread_attr_setaffinity_np(&thread_attr, cpu_set_size,
+					    cpu_set);
+		if (pthread_create(&cpu_threads[i], &thread_attr, thread_func,
+				   NULL))
+			shutdown(1, "pthread_create()", __LINE__);
+		pthread_attr_destroy(&thread_attr);
+	}
+
+	if (!continuous_mode) {
+		pthread_join(cpu_threads[0], &retval);
+		shutdown((long)retval, "perf_test_thread()", __LINE__);
+	} else {
+		while (1)
+			sleep(1);
+	}
+	shutdown(0, "", 0);
+
+err_code:
+	CPU_FREE(cpu_set);
+	exit(1);
+
+}
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
new file mode 100644
index 000000000..78b24cf76
--- /dev/null
+++ b/tools/testing/selftests/net/.gitignore
@@ -0,0 +1,16 @@
+msg_zerocopy
+socket
+psock_fanout
+psock_snd
+psock_tpacket
+reuseport_bpf
+reuseport_bpf_cpu
+reuseport_bpf_numa
+reuseport_dualstack
+reuseaddr_conflict
+tcp_mmap
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
+tcp_inq
+tls
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
new file mode 100644
index 000000000..9a3764a10
--- /dev/null
+++ b/tools/testing/selftests/net/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -I../../../../usr/include/
+
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
+TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
+TEST_PROGS_EXTENDED := in_netns.sh
+TEST_GEN_FILES =  socket
+TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
+TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
+$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
+$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
+$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
new file mode 100644
index 000000000..cd3a2f154
--- /dev/null
+++ b/tools/testing/selftests/net/config
@@ -0,0 +1,16 @@
+CONFIG_USER_NS=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_TEST_BPF=m
+CONFIG_NUMA=y
+CONFIG_NET_VRF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_VETH=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_NET_IPVTI=y
+CONFIG_INET6_XFRM_MODE_TUNNEL=y
+CONFIG_IPV6_VTI=y
+CONFIG_DUMMY=y
+CONFIG_BRIDGE=y
+CONFIG_VLAN_8021Q=y
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
new file mode 100755
index 000000000..864f865ee
--- /dev/null
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -0,0 +1,467 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPv4 and IPv6 onlink tests
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# Network interfaces
+# - odd in current namespace; even in peer ns
+declare -A NETIFS
+# default VRF
+NETIFS[p1]=veth1
+NETIFS[p2]=veth2
+NETIFS[p3]=veth3
+NETIFS[p4]=veth4
+# VRF
+NETIFS[p5]=veth5
+NETIFS[p6]=veth6
+NETIFS[p7]=veth7
+NETIFS[p8]=veth8
+
+# /24 network
+declare -A V4ADDRS
+V4ADDRS[p1]=169.254.1.1
+V4ADDRS[p2]=169.254.1.2
+V4ADDRS[p3]=169.254.3.1
+V4ADDRS[p4]=169.254.3.2
+V4ADDRS[p5]=169.254.5.1
+V4ADDRS[p6]=169.254.5.2
+V4ADDRS[p7]=169.254.7.1
+V4ADDRS[p8]=169.254.7.2
+
+# /64 network
+declare -A V6ADDRS
+V6ADDRS[p1]=2001:db8:101::1
+V6ADDRS[p2]=2001:db8:101::2
+V6ADDRS[p3]=2001:db8:301::1
+V6ADDRS[p4]=2001:db8:301::2
+V6ADDRS[p5]=2001:db8:501::1
+V6ADDRS[p6]=2001:db8:501::2
+V6ADDRS[p7]=2001:db8:701::1
+V6ADDRS[p8]=2001:db8:701::2
+
+# Test networks:
+# [1] = default table
+# [2] = VRF
+#
+# /32 host routes
+declare -A TEST_NET4
+TEST_NET4[1]=169.254.101
+TEST_NET4[2]=169.254.102
+# /128 host routes
+declare -A TEST_NET6
+TEST_NET6[1]=2001:db8:101
+TEST_NET6[2]=2001:db8:102
+
+# connected gateway
+CONGW[1]=169.254.1.254
+CONGW[2]=169.254.3.254
+CONGW[3]=169.254.5.254
+
+# recursive gateway
+RECGW4[1]=169.254.11.254
+RECGW4[2]=169.254.12.254
+RECGW6[1]=2001:db8:11::64
+RECGW6[2]=2001:db8:12::64
+
+# for v4 mapped to v6
+declare -A TEST_NET4IN6IN6
+TEST_NET4IN6[1]=10.1.1.254
+TEST_NET4IN6[2]=10.2.1.254
+
+# mcast address
+MCAST6=ff02::1
+
+
+PEER_NS=bart
+PEER_CMD="ip netns exec ${PEER_NS}"
+VRF=lisa
+VRF_TABLE=1101
+PBR_TABLE=101
+
+################################################################################
+# utilities
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+	else
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "######################################################################"
+	echo "TEST SECTION: $*"
+	echo "######################################################################"
+}
+
+log_subsection()
+{
+	echo
+	echo "#########################################"
+	echo "TEST SUBSECTION: $*"
+}
+
+run_cmd()
+{
+	echo
+	echo "COMMAND: $*"
+	eval $*
+}
+
+get_linklocal()
+{
+	local dev=$1
+	local pfx
+	local addr
+
+	addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+################################################################################
+#
+
+setup()
+{
+	echo
+	echo "########################################"
+	echo "Configuring interfaces"
+
+	set -e
+
+	# create namespace
+	ip netns add ${PEER_NS}
+	ip -netns ${PEER_NS} li set lo up
+
+	# add vrf table
+	ip li add ${VRF} type vrf table ${VRF_TABLE}
+	ip li set ${VRF} up
+	ip ro add table ${VRF_TABLE} unreachable default metric 8192
+	ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192
+
+	# create test interfaces
+	ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
+	ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
+	ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
+	ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
+
+	# enslave vrf interfaces
+	for n in 5 7; do
+		ip li set ${NETIFS[p${n}]} vrf ${VRF}
+	done
+
+	# add addresses
+	for n in 1 3 5 7; do
+		ip li set ${NETIFS[p${n}]} up
+		ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+		ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
+	done
+
+	# move peer interfaces to namespace and add addresses
+	for n in 2 4 6 8; do
+		ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
+		ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+		ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
+	done
+
+	ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
+	ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
+
+	set +e
+}
+
+cleanup()
+{
+	# make sure we start from a clean slate
+	ip netns del ${PEER_NS} 2>/dev/null
+	for n in 1 3 5 7; do
+		ip link del ${NETIFS[p${n}]} 2>/dev/null
+	done
+	ip link del ${VRF} 2>/dev/null
+	ip ro flush table ${VRF_TABLE}
+	ip -6 ro flush table ${VRF_TABLE}
+}
+
+################################################################################
+# IPv4 tests
+#
+
+run_ip()
+{
+	local table="$1"
+	local prefix="$2"
+	local gw="$3"
+	local dev="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
+	log_test $? ${exp_rc} "${desc}"
+}
+
+run_ip_mpath()
+{
+	local table="$1"
+	local prefix="$2"
+	local nh1="$3"
+	local nh2="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip ro add table "${table}" "${prefix}"/32 \
+		nexthop via ${nh1} nexthop via ${nh2}
+	log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv4()
+{
+	# - unicast connected, unicast recursive
+	#
+	log_subsection "default VRF - main table"
+
+	run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
+	run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+
+	log_subsection "VRF ${VRF}"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+	log_subsection "VRF device, PBR table"
+
+	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
+	run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+	# multipath version
+	#
+	log_subsection "default VRF - main table - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.5 \
+		"${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+		"${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.6 \
+		"${RECGW4[1]} dev ${NETIFS[p1]} onlink" \
+		"${RECGW4[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast recursive - multipath"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.7 \
+		"${CONGW[1]} dev ${NETIFS[p1]}"        \
+		"${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink first only"
+
+	run_ip_mpath 254 ${TEST_NET4[1]}.8 \
+		"${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+		"${CONGW[2]} dev ${NETIFS[p3]}"        \
+		0 "unicast connected - multipath onlink second only"
+}
+
+invalid_onlink_ipv4()
+{
+	run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
+		"Invalid gw - local unicast address"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
+		"Invalid gw - local unicast address, VRF"
+
+	run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
+
+	run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
+		"Gateway resolves to wrong nexthop device"
+
+	run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
+		"Gateway resolves to wrong nexthop device - VRF"
+}
+
+################################################################################
+# IPv6 tests
+#
+
+run_ip6()
+{
+	local table="$1"
+	local prefix="$2"
+	local gw="$3"
+	local dev="$4"
+	local exp_rc="$5"
+	local desc="$6"
+
+	# dev arg may be empty
+	[ -n "${dev}" ] && dev="dev ${dev}"
+
+	run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
+	log_test $? ${exp_rc} "${desc}"
+}
+
+run_ip6_mpath()
+{
+	local table="$1"
+	local prefix="$2"
+	local opts="$3"
+	local nh1="$4"
+	local nh2="$5"
+	local exp_rc="$6"
+	local desc="$7"
+
+	run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \
+		nexthop via ${nh1} nexthop via ${nh2}
+	log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv6()
+{
+	# - unicast connected, unicast recursive, v4-mapped
+	#
+	log_subsection "default VRF - main table"
+
+	run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
+	run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
+	run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+
+	log_subsection "VRF ${VRF}"
+
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+	log_subsection "VRF device, PBR table"
+
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+	run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+	# multipath version
+	#
+	log_subsection "default VRF - main table - multipath"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::4 "onlink" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+		0 "unicast connected - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::5 "onlink" \
+		"${RECGW6[1]} dev ${NETIFS[p1]}" \
+		"${RECGW6[2]} dev ${NETIFS[p3]}" \
+		0 "unicast recursive - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::6 "onlink" \
+		"::ffff:${TEST_NET4IN6[1]} dev ${NETIFS[p1]}" \
+		"::ffff:${TEST_NET4IN6[2]} dev ${NETIFS[p3]}" \
+		0 "v4-mapped - multipath onlink"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::7 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink both nexthops"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::8 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+		0 "unicast connected - multipath onlink first only"
+
+	run_ip6_mpath 254 ${TEST_NET6[1]}::9 "" \
+		"${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}"        \
+		"${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+		0 "unicast connected - multipath onlink second only"
+}
+
+invalid_onlink_ipv6()
+{
+	local lladdr
+
+	lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1
+
+	run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \
+		"Invalid gw - local unicast address"
+	run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \
+		"Invalid gw - local linklocal address"
+	run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \
+		"Invalid gw - multicast address"
+
+	lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \
+		"Invalid gw - local unicast address, VRF"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \
+		"Invalid gw - local linklocal address, VRF"
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \
+		"Invalid gw - multicast address, VRF"
+
+	run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
+		"No nexthop device given"
+
+	# default VRF validation is done against LOCAL table
+	# run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
+	#	"Gateway resolves to wrong nexthop device"
+
+	run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
+		"Gateway resolves to wrong nexthop device - VRF"
+}
+
+run_onlink_tests()
+{
+	log_section "IPv4 onlink"
+	log_subsection "Valid onlink commands"
+	valid_onlink_ipv4
+	log_subsection "Invalid onlink commands"
+	invalid_onlink_ipv4
+
+	log_section "IPv6 onlink"
+	log_subsection "Valid onlink commands"
+	valid_onlink_ipv6
+	log_subsection "Invalid onlink commands"
+	invalid_onlink_ipv6
+}
+
+################################################################################
+# main
+
+nsuccess=0
+nfail=0
+
+cleanup
+setup
+run_onlink_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
new file mode 100755
index 000000000..ba2d9fab2
--- /dev/null
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB rules API
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
+
+RTABLE=100
+GW_IP4=192.51.100.2
+SRC_IP=192.51.100.3
+GW_IP6=2001:db8:1::2
+SRC_IP6=2001:db8:1::3
+
+DEV_ADDR=192.51.100.1
+DEV_ADDR6=2001:db8:1::1
+DEV=dummy0
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "######################################################################"
+	echo "TEST SECTION: $*"
+	echo "######################################################################"
+}
+
+setup()
+{
+	set -e
+	ip netns add testns
+	$IP link set dev lo up
+
+	$IP link add dummy0 type dummy
+	$IP link set dev dummy0 up
+	$IP address add $DEV_ADDR/24 dev dummy0
+	$IP -6 address add $DEV_ADDR6/64 dev dummy0
+
+	set +e
+}
+
+cleanup()
+{
+	$IP link del dev dummy0 &> /dev/null
+	ip netns del testns
+}
+
+fib_check_iproute_support()
+{
+	ip rule help 2>&1 | grep -q $1
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 iprule too old, missing $1 match"
+		return 1
+	fi
+
+	ip route get help 2>&1 | grep -q $2
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 get route too old, missing $2 match"
+		return 1
+	fi
+
+	return 0
+}
+
+fib_rule6_del()
+{
+	$IP -6 rule del $1
+	log_test $? 0 "rule6 del $1"
+}
+
+fib_rule6_del_by_pref()
+{
+	pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	$IP -6 rule del pref $pref
+}
+
+fib_rule6_test_match_n_redirect()
+{
+	local match="$1"
+	local getmatch="$2"
+
+	$IP -6 rule add $match table $RTABLE
+	$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
+	log_test $? 0 "rule6 check: $1"
+
+	fib_rule6_del_by_pref "$match"
+	log_test $? 0 "rule6 del by pref: $match"
+}
+
+fib_rule6_test()
+{
+	# setup the fib rule redirect route
+	$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
+
+	match="oif $DEV"
+	fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+	match="from $SRC_IP6 iif $DEV"
+	fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+	match="tos 0x10"
+	fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+	match="fwmark 0x64"
+	getmatch="mark 0x64"
+	fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+	fib_check_iproute_support "uidrange" "uid"
+	if [ $? -eq 0 ]; then
+		match="uidrange 100-100"
+		getmatch="uid 100"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+	fi
+
+	fib_check_iproute_support "sport" "sport"
+	if [ $? -eq 0 ]; then
+		match="sport 666 dport 777"
+		fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto tcp"
+		fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto ipv6-icmp"
+		fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match"
+	fi
+}
+
+fib_rule4_del()
+{
+	$IP rule del $1
+	log_test $? 0 "del $1"
+}
+
+fib_rule4_del_by_pref()
+{
+	pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	$IP rule del pref $pref
+}
+
+fib_rule4_test_match_n_redirect()
+{
+	local match="$1"
+	local getmatch="$2"
+
+	$IP rule add $match table $RTABLE
+	$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
+	log_test $? 0 "rule4 check: $1"
+
+	fib_rule4_del_by_pref "$match"
+	log_test $? 0 "rule4 del by pref: $match"
+}
+
+fib_rule4_test()
+{
+	# setup the fib rule redirect route
+	$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
+
+	match="oif $DEV"
+	fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+	match="from $SRC_IP iif $DEV"
+	fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+	match="tos 0x10"
+	fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+	match="fwmark 0x64"
+	getmatch="mark 0x64"
+	fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+	fib_check_iproute_support "uidrange" "uid"
+	if [ $? -eq 0 ]; then
+		match="uidrange 100-100"
+		getmatch="uid 100"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+	fi
+
+	fib_check_iproute_support "sport" "sport"
+	if [ $? -eq 0 ]; then
+		match="sport 666 dport 777"
+		fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto tcp"
+		fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto icmp"
+		fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+	fi
+}
+
+run_fibrule_tests()
+{
+	log_section "IPv4 fib rule"
+	fib_rule4_test
+	log_section "IPv6 fib rule"
+	fib_rule6_test
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+# start clean
+cleanup &> /dev/null
+setup
+run_fibrule_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
new file mode 100755
index 000000000..a5ba14976
--- /dev/null
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -0,0 +1,1439 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB behavior in response to
+# different events.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+IP="ip -netns testns"
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+}
+
+setup()
+{
+	set -e
+	ip netns add testns
+	$IP link set dev lo up
+
+	$IP link add dummy0 type dummy
+	$IP link set dev dummy0 up
+	$IP address add 198.51.100.1/24 dev dummy0
+	$IP -6 address add 2001:db8:1::1/64 dev dummy0
+	set +e
+
+}
+
+cleanup()
+{
+	$IP link del dev dummy0 &> /dev/null
+	ip netns del testns
+}
+
+get_linklocal()
+{
+	local dev=$1
+	local addr
+
+	addr=$($IP -6 -br addr show dev ${dev} | \
+	awk '{
+		for (i = 3; i <= NF; ++i) {
+			if ($i ~ /^fe80/)
+				print $i
+		}
+	}'
+	)
+	addr=${addr/\/*}
+
+	[ -z "$addr" ] && return 1
+
+	echo $addr
+
+	return 0
+}
+
+fib_unreg_unicast_test()
+{
+	echo
+	echo "Single path route test"
+
+	setup
+
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	set -e
+	$IP link del dev dummy0
+	set +e
+
+	echo "    Nexthop device deleted"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 2 "IPv4 fibmatch - no route"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 2 "IPv6 fibmatch - no route"
+
+	cleanup
+}
+
+fib_unreg_multipath_test()
+{
+
+	echo
+	echo "Multipath route test"
+
+	setup
+
+	set -e
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 up
+	$IP address add 192.0.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
+
+	$IP route add 203.0.113.0/24 \
+		nexthop via 198.51.100.2 dev dummy0 \
+		nexthop via 192.0.2.2 dev dummy1
+	$IP -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev dummy0 \
+		nexthop via 2001:db8:2::2 dev dummy1
+	set +e
+
+	echo "    Start point"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	set -e
+	$IP link del dev dummy0
+	set +e
+
+	echo "    One nexthop device deleted"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 2 "IPv4 - multipath route removed on delete"
+
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	# In IPv6 we do not flush the entire multipath route.
+	log_test $? 0 "IPv6 - multipath down to single path"
+
+	set -e
+	$IP link del dev dummy1
+	set +e
+
+	echo "    Second nexthop device deleted"
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 2 "IPv6 - no route"
+
+	cleanup
+}
+
+fib_unreg_test()
+{
+	fib_unreg_unicast_test
+	fib_unreg_multipath_test
+}
+
+fib_down_unicast_test()
+{
+	echo
+	echo "Single path, admin down"
+
+	setup
+
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	set -e
+	$IP link set dev dummy0 down
+	set +e
+
+	echo "    Route deleted on down"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 2 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 2 "IPv6 fibmatch"
+
+	cleanup
+}
+
+fib_down_multipath_test_do()
+{
+	local down_dev=$1
+	local up_dev=$2
+
+	$IP route get fibmatch 203.0.113.1 \
+		oif $down_dev &> /dev/null
+	log_test $? 2 "IPv4 fibmatch on down device"
+	$IP -6 route get fibmatch 2001:db8:3::1 \
+		oif $down_dev &> /dev/null
+	log_test $? 2 "IPv6 fibmatch on down device"
+
+	$IP route get fibmatch 203.0.113.1 \
+		oif $up_dev &> /dev/null
+	log_test $? 0 "IPv4 fibmatch on up device"
+	$IP -6 route get fibmatch 2001:db8:3::1 \
+		oif $up_dev &> /dev/null
+	log_test $? 0 "IPv6 fibmatch on up device"
+
+	$IP route get fibmatch 203.0.113.1 | \
+		grep $down_dev | grep -q "dead linkdown"
+	log_test $? 0 "IPv4 flags on down device"
+	$IP -6 route get fibmatch 2001:db8:3::1 | \
+		grep $down_dev | grep -q "dead linkdown"
+	log_test $? 0 "IPv6 flags on down device"
+
+	$IP route get fibmatch 203.0.113.1 | \
+		grep $up_dev | grep -q "dead linkdown"
+	log_test $? 1 "IPv4 flags on up device"
+	$IP -6 route get fibmatch 2001:db8:3::1 | \
+		grep $up_dev | grep -q "dead linkdown"
+	log_test $? 1 "IPv6 flags on up device"
+}
+
+fib_down_multipath_test()
+{
+	echo
+	echo "Admin down multipath"
+
+	setup
+
+	set -e
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 up
+
+	$IP address add 192.0.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
+
+	$IP route add 203.0.113.0/24 \
+		nexthop via 198.51.100.2 dev dummy0 \
+		nexthop via 192.0.2.2 dev dummy1
+	$IP -6 route add 2001:db8:3::/64 \
+		nexthop via 2001:db8:1::2 dev dummy0 \
+		nexthop via 2001:db8:2::2 dev dummy1
+	set +e
+
+	echo "    Verify start point"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	set -e
+	$IP link set dev dummy0 down
+	set +e
+
+	echo "    One device down, one up"
+	fib_down_multipath_test_do "dummy0" "dummy1"
+
+	set -e
+	$IP link set dev dummy0 up
+	$IP link set dev dummy1 down
+	set +e
+
+	echo "    Other device down and up"
+	fib_down_multipath_test_do "dummy1" "dummy0"
+
+	set -e
+	$IP link set dev dummy0 down
+	set +e
+
+	echo "    Both devices down"
+	$IP route get fibmatch 203.0.113.1 &> /dev/null
+	log_test $? 2 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+	log_test $? 2 "IPv6 fibmatch"
+
+	$IP link del dev dummy1
+	cleanup
+}
+
+fib_down_test()
+{
+	fib_down_unicast_test
+	fib_down_multipath_test
+}
+
+# Local routes should not be affected when carrier changes.
+fib_carrier_local_test()
+{
+	echo
+	echo "Local carrier tests - single path"
+
+	setup
+
+	set -e
+	$IP link set dev dummy0 carrier on
+	set +e
+
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	$IP route get fibmatch 198.51.100.1 | \
+		grep -q "linkdown"
+	log_test $? 1 "IPv4 - no linkdown flag"
+	$IP -6 route get fibmatch 2001:db8:1::1 | \
+		grep -q "linkdown"
+	log_test $? 1 "IPv6 - no linkdown flag"
+
+	set -e
+	$IP link set dev dummy0 carrier off
+	sleep 1
+	set +e
+
+	echo "    Carrier off on nexthop"
+	$IP route get fibmatch 198.51.100.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	$IP route get fibmatch 198.51.100.1 | \
+		grep -q "linkdown"
+	log_test $? 1 "IPv4 - linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:1::1 | \
+		grep -q "linkdown"
+	log_test $? 1 "IPv6 - linkdown flag set"
+
+	set -e
+	$IP address add 192.0.2.1/24 dev dummy0
+	$IP -6 address add 2001:db8:2::1/64 dev dummy0
+	set +e
+
+	echo "    Route to local address with carrier down"
+	$IP route get fibmatch 192.0.2.1 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	$IP route get fibmatch 192.0.2.1 | \
+		grep -q "linkdown"
+	log_test $? 1 "IPv4 linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:2::1 | \
+		grep -q "linkdown"
+	log_test $? 1 "IPv6 linkdown flag set"
+
+	cleanup
+}
+
+fib_carrier_unicast_test()
+{
+	ret=0
+
+	echo
+	echo "Single path route carrier test"
+
+	setup
+
+	set -e
+	$IP link set dev dummy0 carrier on
+	set +e
+
+	echo "    Start point"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	$IP route get fibmatch 198.51.100.2 | \
+		grep -q "linkdown"
+	log_test $? 1 "IPv4 no linkdown flag"
+	$IP -6 route get fibmatch 2001:db8:1::2 | \
+		grep -q "linkdown"
+	log_test $? 1 "IPv6 no linkdown flag"
+
+	set -e
+	$IP link set dev dummy0 carrier off
+	sleep 1
+	set +e
+
+	echo "    Carrier down"
+	$IP route get fibmatch 198.51.100.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	$IP route get fibmatch 198.51.100.2 | \
+		grep -q "linkdown"
+	log_test $? 0 "IPv4 linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:1::2 | \
+		grep -q "linkdown"
+	log_test $? 0 "IPv6 linkdown flag set"
+
+	set -e
+	$IP address add 192.0.2.1/24 dev dummy0
+	$IP -6 address add 2001:db8:2::1/64 dev dummy0
+	set +e
+
+	echo "    Second address added with carrier down"
+	$IP route get fibmatch 192.0.2.2 &> /dev/null
+	log_test $? 0 "IPv4 fibmatch"
+	$IP -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+	log_test $? 0 "IPv6 fibmatch"
+
+	$IP route get fibmatch 192.0.2.2 | \
+		grep -q "linkdown"
+	log_test $? 0 "IPv4 linkdown flag set"
+	$IP -6 route get fibmatch 2001:db8:2::2 | \
+		grep -q "linkdown"
+	log_test $? 0 "IPv6 linkdown flag set"
+
+	cleanup
+}
+
+fib_carrier_test()
+{
+	fib_carrier_local_test
+	fib_carrier_unicast_test
+}
+
+################################################################################
+# Tests on nexthop spec
+
+# run 'ip route add' with given spec
+add_rt()
+{
+	local desc="$1"
+	local erc=$2
+	local vrf=$3
+	local pfx=$4
+	local gw=$5
+	local dev=$6
+	local cmd out rc
+
+	[ "$vrf" = "-" ] && vrf="default"
+	[ -n "$gw" ] && gw="via $gw"
+	[ -n "$dev" ] && dev="dev $dev"
+
+	cmd="$IP route add vrf $vrf $pfx $gw $dev"
+	if [ "$VERBOSE" = "1" ]; then
+		printf "\n    COMMAND: $cmd\n"
+	fi
+
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+	log_test $rc $erc "$desc"
+}
+
+fib4_nexthop()
+{
+	echo
+	echo "IPv4 nexthop tests"
+
+	echo "<<< write me >>>"
+}
+
+fib6_nexthop()
+{
+	local lldummy=$(get_linklocal dummy0)
+	local llv1=$(get_linklocal dummy0)
+
+	if [ -z "$lldummy" ]; then
+		echo "Failed to get linklocal address for dummy0"
+		return 1
+	fi
+	if [ -z "$llv1" ]; then
+		echo "Failed to get linklocal address for veth1"
+		return 1
+	fi
+
+	echo
+	echo "IPv6 nexthop tests"
+
+	add_rt "Directly connected nexthop, unicast address" 0 \
+		- 2001:db8:101::/64 2001:db8:1::2
+	add_rt "Directly connected nexthop, unicast address with device" 0 \
+		- 2001:db8:102::/64 2001:db8:1::2 "dummy0"
+	add_rt "Gateway is linklocal address" 0 \
+		- 2001:db8:103::1/64 $llv1 "veth0"
+
+	# fails because LL address requires a device
+	add_rt "Gateway is linklocal address, no device" 2 \
+		- 2001:db8:104::1/64 $llv1
+
+	# local address can not be a gateway
+	add_rt "Gateway can not be local unicast address" 2 \
+		- 2001:db8:105::/64 2001:db8:1::1
+	add_rt "Gateway can not be local unicast address, with device" 2 \
+		- 2001:db8:106::/64 2001:db8:1::1 "dummy0"
+	add_rt "Gateway can not be a local linklocal address" 2 \
+		- 2001:db8:107::1/64 $lldummy "dummy0"
+
+	# VRF tests
+	add_rt "Gateway can be local address in a VRF" 0 \
+		- 2001:db8:108::/64 2001:db8:51::2
+	add_rt "Gateway can be local address in a VRF, with device" 0 \
+		- 2001:db8:109::/64 2001:db8:51::2 "veth0"
+	add_rt "Gateway can be local linklocal address in a VRF" 0 \
+		- 2001:db8:110::1/64 $llv1 "veth0"
+
+	add_rt "Redirect to VRF lookup" 0 \
+		- 2001:db8:111::/64 "" "red"
+
+	add_rt "VRF route, gateway can be local address in default VRF" 0 \
+		red 2001:db8:112::/64 2001:db8:51::1
+
+	# local address in same VRF fails
+	add_rt "VRF route, gateway can not be a local address" 2 \
+		red 2001:db8:113::1/64 2001:db8:2::1
+	add_rt "VRF route, gateway can not be a local addr with device" 2 \
+		red 2001:db8:114::1/64 2001:db8:2::1 "dummy1"
+}
+
+# Default VRF:
+#   dummy0 - 198.51.100.1/24 2001:db8:1::1/64
+#   veth0  - 192.0.2.1/24    2001:db8:51::1/64
+#
+# VRF red:
+#   dummy1 - 192.168.2.1/24 2001:db8:2::1/64
+#   veth1  - 192.0.2.2/24   2001:db8:51::2/64
+#
+#  [ dummy0   veth0 ]--[ veth1   dummy1 ]
+
+fib_nexthop_test()
+{
+	setup
+
+	set -e
+
+	$IP -4 rule add pref 32765 table local
+	$IP -4 rule del pref 0
+	$IP -6 rule add pref 32765 table local
+	$IP -6 rule del pref 0
+
+	$IP link add red type vrf table 1
+	$IP link set red up
+	$IP -4 route add vrf red unreachable default metric 4278198272
+	$IP -6 route add vrf red unreachable default metric 4278198272
+
+	$IP link add veth0 type veth peer name veth1
+	$IP link set dev veth0 up
+	$IP address add 192.0.2.1/24 dev veth0
+	$IP -6 address add 2001:db8:51::1/64 dev veth0
+
+	$IP link set dev veth1 vrf red up
+	$IP address add 192.0.2.2/24 dev veth1
+	$IP -6 address add 2001:db8:51::2/64 dev veth1
+
+	$IP link add dummy1 type dummy
+	$IP link set dev dummy1 vrf red up
+	$IP address add 192.168.2.1/24 dev dummy1
+	$IP -6 address add 2001:db8:2::1/64 dev dummy1
+	set +e
+
+	sleep 1
+	fib4_nexthop
+	fib6_nexthop
+
+	(
+	$IP link del dev dummy1
+	$IP link del veth0
+	$IP link del red
+	) 2>/dev/null
+	cleanup
+}
+
+################################################################################
+# Tests on route add and replace
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+check_expected()
+{
+	local out="$1"
+	local expected="$2"
+	local rc=0
+
+	[ "${out}" = "${expected}" ] && return 0
+
+	if [ -z "${out}" ]; then
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\nNo route entry found\n"
+			printf "Expected:\n"
+			printf "    ${expected}\n"
+		fi
+		return 1
+	fi
+
+	# tricky way to convert output to 1-line without ip's
+	# messy '\'; this drops all extra white space
+	out=$(echo ${out})
+	if [ "${out}" != "${expected}" ]; then
+		rc=1
+		if [ "${VERBOSE}" = "1" ]; then
+			printf "    Unexpected route entry. Have:\n"
+			printf "        ${out}\n"
+			printf "    Expected:\n"
+			printf "        ${expected}\n\n"
+		fi
+	fi
+
+	return $rc
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route6()
+{
+	local pfx="$1"
+	local nh="$2"
+	local out
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo
+		echo "    ##################################################"
+		echo
+	fi
+
+	run_cmd "$IP -6 ro flush ${pfx}"
+	[ $? -ne 0 ] && exit 1
+
+	out=$($IP -6 ro ls match ${pfx})
+	if [ -n "$out" ]; then
+		echo "Failed to flush routes for prefix used for tests."
+		exit 1
+	fi
+
+	run_cmd "$IP -6 ro add ${pfx} ${nh}"
+	if [ $? -ne 0 ]; then
+		echo "Failed to add initial route for test."
+		exit 1
+	fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route6()
+{
+	add_route6 "2001:db8:104::/64" "$1"
+}
+
+check_route6()
+{
+	local pfx="2001:db8:104::/64"
+	local expected="$1"
+	local out
+	local rc=0
+
+	out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
+	check_expected "${out}" "${expected}"
+}
+
+route_cleanup()
+{
+	$IP li del red 2>/dev/null
+	$IP li del dummy1 2>/dev/null
+	$IP li del veth1 2>/dev/null
+	$IP li del veth3 2>/dev/null
+
+	cleanup &> /dev/null
+}
+
+route_setup()
+{
+	route_cleanup
+	setup
+
+	[ "${VERBOSE}" = "1" ] && set -x
+	set -e
+
+	$IP li add red up type vrf table 101
+	$IP li add veth1 type veth peer name veth2
+	$IP li add veth3 type veth peer name veth4
+
+	$IP li set veth1 up
+	$IP li set veth3 up
+	$IP li set veth2 vrf red up
+	$IP li set veth4 vrf red up
+	$IP li add dummy1 type dummy
+	$IP li set dummy1 vrf red up
+
+	$IP -6 addr add 2001:db8:101::1/64 dev veth1
+	$IP -6 addr add 2001:db8:101::2/64 dev veth2
+	$IP -6 addr add 2001:db8:103::1/64 dev veth3
+	$IP -6 addr add 2001:db8:103::2/64 dev veth4
+	$IP -6 addr add 2001:db8:104::1/64 dev dummy1
+
+	$IP addr add 172.16.101.1/24 dev veth1
+	$IP addr add 172.16.101.2/24 dev veth2
+	$IP addr add 172.16.103.1/24 dev veth3
+	$IP addr add 172.16.103.2/24 dev veth4
+	$IP addr add 172.16.104.1/24 dev dummy1
+
+	set +e
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv6_rt_add()
+{
+	local rc
+
+	echo
+	echo "IPv6 route add / append tests"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2"
+	log_test $? 2 "Attempt to add duplicate route - gw"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3"
+	log_test $? 2 "Attempt to add duplicate route - dev only"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+	log_test $? 2 "Attempt to add duplicate route - reject route"
+
+	# route append with same prefix adds a new route
+	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Append nexthop to existing route - gw"
+
+	# insert mpath directly
+	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Add multipath route"
+
+	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	log_test $? 2 "Attempt to add duplicate multipath route"
+
+	# insert of a second route without append but different metric
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "Route add with different metrics"
+
+	run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+		rc=$?
+	fi
+	log_test $rc 0 "Route delete with metric"
+}
+
+ipv6_rt_replace_single()
+{
+	# single path with single path
+	#
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+	log_test $? 0 "Single path with single path"
+
+	# single path with multipath
+	#
+	add_initial_route6 "nexthop via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Single path with multipath"
+
+	# single path with single path using MULTIPATH attribute
+	#
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+	log_test $? 0 "Single path with single path via multipath attribute"
+
+	# route replace fails - invalid nexthop
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2"
+	if [ $? -eq 0 ]; then
+		# previous command is expected to fail so if it returns 0
+		# that means the test failed.
+		log_test 0 1 "Invalid nexthop"
+	else
+		check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+		log_test $? 0 "Invalid nexthop"
+	fi
+
+	# replace non-existent route
+	# - note use of change versus replace since ip adds NLM_F_CREATE
+	#   for replace
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2"
+	log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv6_rt_replace_mpath()
+{
+	# multipath with multipath
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1"
+	log_test $? 0 "Multipath with multipath"
+
+	# multipath with single
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3"
+	check_route6  "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+	log_test $? 0 "Multipath with single path"
+
+	# multipath with single
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3"
+	check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+	log_test $? 0 "Multipath with single path via multipath attribute"
+
+	# multipath with dev-only
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 dev veth1"
+	check_route6 "2001:db8:104::/64 dev veth1 metric 1024"
+	log_test $? 0 "Multipath with dev-only"
+
+	# route replace fails - invalid nexthop 1
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid first nexthop"
+
+	# route replace fails - invalid nexthop 2
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid second nexthop"
+
+	# multipath non-existent route
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+	log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv6_rt_replace()
+{
+	echo
+	echo "IPv6 route replace tests"
+
+	ipv6_rt_replace_single
+	ipv6_rt_replace_mpath
+}
+
+ipv6_route_test()
+{
+	route_setup
+
+	ipv6_rt_add
+	ipv6_rt_replace
+
+	route_cleanup
+}
+
+ip_addr_metric_check()
+{
+	ip addr help 2>&1 | grep -q metric
+	if [ $? -ne 0 ]; then
+		echo "iproute2 command does not support metric for addresses. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+ipv6_addr_metric_test()
+{
+	local rc
+
+	echo
+	echo "IPv6 prefix route tests"
+
+	ip_addr_metric_check || return 1
+
+	setup
+
+	set -e
+	$IP li add dummy1 type dummy
+	$IP li add dummy2 type dummy
+	$IP li set dummy1 up
+	$IP li set dummy2 up
+
+	# default entry is metric 256
+	run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64"
+	run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256"
+	log_test $? 0 "Default metric"
+
+	set -e
+	run_cmd "$IP -6 addr flush dev dummy1"
+	run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257"
+	log_test $? 0 "User specified metric on first device"
+
+	set -e
+	run_cmd "$IP -6 addr flush dev dummy2"
+	run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+	log_test $? 0 "User specified metric on second device"
+
+	run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+		rc=$?
+	fi
+	log_test $rc 0 "Delete of address on first device"
+
+	run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Modify metric of address"
+
+	# verify prefix route removed on down
+	run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+	run_cmd "$IP li set dev dummy2 down"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		out=$($IP -6 ro ls match 2001:db8:104::/64)
+		check_expected "${out}" ""
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route removed on link down"
+
+	# verify prefix route re-inserted with assigned metric
+	run_cmd "$IP li set dev dummy2 up"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route with metric on link up"
+
+	# verify peer metric added correctly
+	set -e
+	run_cmd "$IP -6 addr flush dev dummy2"
+	run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::1 peer 2001:db8:104::2 metric 260"
+	set +e
+
+	check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260"
+	log_test $? 0 "Set metric with peer route on local side"
+	check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260"
+	log_test $? 0 "Set metric with peer route on peer side"
+
+	set -e
+	run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::1 peer 2001:db8:104::3 metric 261"
+	set +e
+
+	check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 261"
+	log_test $? 0 "Modify metric and peer address on local side"
+	check_route6 "2001:db8:104::3 dev dummy2 proto kernel metric 261"
+	log_test $? 0 "Modify metric and peer address on peer side"
+
+	$IP li del dummy1
+	$IP li del dummy2
+	cleanup
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route()
+{
+	local pfx="$1"
+	local nh="$2"
+	local out
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo
+		echo "    ##################################################"
+		echo
+	fi
+
+	run_cmd "$IP ro flush ${pfx}"
+	[ $? -ne 0 ] && exit 1
+
+	out=$($IP ro ls match ${pfx})
+	if [ -n "$out" ]; then
+		echo "Failed to flush routes for prefix used for tests."
+		exit 1
+	fi
+
+	run_cmd "$IP ro add ${pfx} ${nh}"
+	if [ $? -ne 0 ]; then
+		echo "Failed to add initial route for test."
+		exit 1
+	fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route()
+{
+	add_route "172.16.104.0/24" "$1"
+}
+
+check_route()
+{
+	local pfx="172.16.104.0/24"
+	local expected="$1"
+	local out
+
+	out=$($IP ro ls match ${pfx})
+	check_expected "${out}" "${expected}"
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv4_rt_add()
+{
+	local rc
+
+	echo
+	echo "IPv4 route add / append tests"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2"
+	log_test $? 2 "Attempt to add duplicate route - gw"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 dev veth3"
+	log_test $? 2 "Attempt to add duplicate route - dev only"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	log_test $? 2 "Attempt to add duplicate route - reject route"
+
+	# iproute2 prepend only sets NLM_F_CREATE
+	# - adds a new route; does NOT convert existing route to ECMP
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1"
+	log_test $? 0 "Add new nexthop for existing prefix"
+
+	# route append with same prefix adds a new route
+	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Append nexthop to existing route - gw"
+
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link"
+	log_test $? 0 "Append nexthop to existing route - dev only"
+
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append unreachable 172.16.104.0/24"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24"
+	log_test $? 0 "Append nexthop to existing route - reject route"
+
+	run_cmd "$IP ro flush 172.16.104.0/24"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+	check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Append nexthop to existing reject route - gw"
+
+	run_cmd "$IP ro flush 172.16.104.0/24"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+	check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link"
+	log_test $? 0 "Append nexthop to existing reject route - dev only"
+
+	# insert mpath directly
+	add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "add multipath route"
+
+	add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	log_test $? 2 "Attempt to add duplicate multipath route"
+
+	# insert of a second route without append but different metric
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "Route add with different metrics"
+
+	run_cmd "$IP ro del 172.16.104.0/24 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "Route delete with metric"
+}
+
+ipv4_rt_replace_single()
+{
+	# single path with single path
+	#
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Single path with single path"
+
+	# single path with multipath
+	#
+	add_initial_route "nexthop via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2"
+	check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "Single path with multipath"
+
+	# single path with reject
+	#
+	add_initial_route "nexthop via 172.16.101.2"
+	run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+	check_route "unreachable 172.16.104.0/24"
+	log_test $? 0 "Single path with reject route"
+
+	# single path with single path using MULTIPATH attribute
+	#
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Single path with single path via multipath attribute"
+
+	# route replace fails - invalid nexthop
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2"
+	if [ $? -eq 0 ]; then
+		# previous command is expected to fail so if it returns 0
+		# that means the test failed.
+		log_test 0 1 "Invalid nexthop"
+	else
+		check_route "172.16.104.0/24 via 172.16.101.2 dev veth1"
+		log_test $? 0 "Invalid nexthop"
+	fi
+
+	# replace non-existent route
+	# - note use of change versus replace since ip adds NLM_F_CREATE
+	#   for replace
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2"
+	log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv4_rt_replace_mpath()
+{
+	# multipath with multipath
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1"
+	log_test $? 0 "Multipath with multipath"
+
+	# multipath with single
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3"
+	check_route  "172.16.104.0/24 via 172.16.101.3 dev veth1"
+	log_test $? 0 "Multipath with single path"
+
+	# multipath with single
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3"
+	check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
+	log_test $? 0 "Multipath with single path via multipath attribute"
+
+	# multipath with reject
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+	check_route "unreachable 172.16.104.0/24"
+	log_test $? 0 "Multipath with reject route"
+
+	# route replace fails - invalid nexthop 1
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid first nexthop"
+
+	# route replace fails - invalid nexthop 2
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid second nexthop"
+
+	# multipath non-existent route
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+	log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv4_rt_replace()
+{
+	echo
+	echo "IPv4 route replace tests"
+
+	ipv4_rt_replace_single
+	ipv4_rt_replace_mpath
+}
+
+ipv4_route_test()
+{
+	route_setup
+
+	ipv4_rt_add
+	ipv4_rt_replace
+
+	route_cleanup
+}
+
+ipv4_addr_metric_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 prefix route tests"
+
+	ip_addr_metric_check || return 1
+
+	setup
+
+	set -e
+	$IP li add dummy1 type dummy
+	$IP li add dummy2 type dummy
+	$IP li set dummy1 up
+	$IP li set dummy2 up
+
+	# default entry is metric 256
+	run_cmd "$IP addr add dev dummy1 172.16.104.1/24"
+	run_cmd "$IP addr add dev dummy2 172.16.104.2/24"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2"
+	log_test $? 0 "Default metric"
+
+	set -e
+	run_cmd "$IP addr flush dev dummy1"
+	run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257"
+	log_test $? 0 "User specified metric on first device"
+
+	set -e
+	run_cmd "$IP addr flush dev dummy2"
+	run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+	log_test $? 0 "User specified metric on second device"
+
+	run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+		rc=$?
+	fi
+	log_test $rc 0 "Delete of address on first device"
+
+	run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Modify metric of address"
+
+	# verify prefix route removed on down
+	run_cmd "$IP li set dev dummy2 down"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		out=$($IP ro ls match 172.16.104.0/24)
+		check_expected "${out}" ""
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route removed on link down"
+
+	# verify prefix route re-inserted with assigned metric
+	run_cmd "$IP li set dev dummy2 up"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route with metric on link up"
+
+	# explicitly check for metric changes on edge scenarios
+	run_cmd "$IP addr flush dev dummy2"
+	run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259"
+	run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260"
+		rc=$?
+	fi
+	log_test $rc 0 "Modify metric of .0/24 address"
+
+	run_cmd "$IP addr flush dev dummy2"
+	run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 260"
+		rc=$?
+	fi
+	log_test $rc 0 "Set metric of address with peer route"
+
+	run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.3 metric 261"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.3 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
+		rc=$?
+	fi
+	log_test $rc 0 "Modify metric and peer address for peer route"
+
+	$IP li del dummy1
+	$IP li del dummy2
+	cleanup
+}
+
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip route help 2>&1 | grep -q fibmatch
+if [ $? -ne 0 ]; then
+	echo "SKIP: iproute2 too old, missing fibmatch"
+	exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+	case $t in
+	fib_unreg_test|unregister)	fib_unreg_test;;
+	fib_down_test|down)		fib_down_test;;
+	fib_carrier_test|carrier)	fib_carrier_test;;
+	fib_nexthop_test|nexthop)	fib_nexthop_test;;
+	ipv6_route_test|ipv6_rt)	ipv6_route_test;;
+	ipv4_route_test|ipv4_rt)	ipv4_route_test;;
+	ipv6_addr_metric)		ipv6_addr_metric_test;;
+	ipv4_addr_metric)		ipv4_addr_metric_test;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644
index 000000000..a793eef5b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644
index 000000000..b8a2af8fc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/README
@@ -0,0 +1,58 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+                             br0
+                              +
+               vrf-h1         |           vrf-h2
+                 +        +---+----+        +
+                 |        |        |        |
+    192.0.2.1/24 +        +        +        + 192.0.2.2/24
+               swp1     swp2     swp3     swp4
+                 +        +        +        +
+                 |        |        |        |
+                 +--------+        +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+  of recreating the same topology.
+o Tests that use anything but the most trivial topologies should include
+  an ASCII art showing the topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+  RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+  multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh b/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh
new file mode 100755
index 000000000..a43b4645c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 flooding"
+NUM_NETIFS=6
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.3/24 2001:db8:1::3/64
+}
+
+h3_destroy()
+{
+	simple_if_fini $h3 192.0.2.3/24 2001:db8:1::3/64
+}
+
+switch_create()
+{
+	ip link add dev br0 type bridge
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+	ip link set dev $swp3 master br0
+
+	ip link set dev $swp1 type bridge_slave isolated on
+	check_err $? "Can't set isolation on port $swp1"
+	ip link set dev $swp2 type bridge_slave isolated on
+	check_err $? "Can't set isolation on port $swp2"
+	ip link set dev $swp3 type bridge_slave isolated off
+	check_err $? "Can't disable isolation on port $swp3"
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+	ip link set dev $swp3 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp3 down
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	RET=0
+	ping_do $h1 192.0.2.2
+	check_fail $? "Ping worked when it should not have"
+
+	RET=0
+	ping_do $h3 192.0.2.2
+	check_err $? "Ping didn't work when it should have"
+
+	log_test "Isolated port ping"
+}
+
+ping_ipv6()
+{
+	RET=0
+	ping6_do $h1 2001:db8:1::2
+	check_fail $? "Ping6 worked when it should not have"
+
+	RET=0
+	ping6_do $h3 2001:db8:1::2
+	check_err $? "Ping6 didn't work when it should have"
+
+	log_test "Isolated port ping6"
+}
+
+flooding()
+{
+	local mac=de:ad:be:ef:13:37
+	local ip=192.0.2.100
+
+	RET=0
+	flood_test_do false $mac $ip $h1 $h2
+	check_err $? "Packet was flooded when it should not have been"
+
+	RET=0
+	flood_test_do true $mac $ip $h3 $h2
+	check_err $? "Packet was not flooded when it should have been"
+
+	log_test "Isolated port flooding"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755
index 000000000..b90dff8d3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn"
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	# 10 Seconds ageing time.
+	ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+		mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+	learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+	flood_test $swp2 $h1 $h2
+}
+
+vlan_deletion()
+{
+	# Test that the deletion of a VLAN on a bridge port does not affect
+	# the PVID VLAN
+	log_info "Add and delete a VLAN on bridge port $swp1"
+
+	bridge vlan add vid 10 dev $swp1
+	bridge vlan del vid 10 dev $swp1
+
+	ping_ipv4
+	ping_ipv6
+}
+
+extern_learn()
+{
+	local mac=de:ad:be:ef:13:37
+	local ageing_time
+
+	# Test that externally learned FDB entries can roam, but not age out
+	RET=0
+
+	bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn vlan 1
+
+	bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37
+	check_err $? "Did not find FDB entry when should"
+
+	# Wait for 10 seconds after the ageing time to make sure the FDB entry
+	# was not aged out
+	ageing_time=$(bridge_ageing_time_get br0)
+	sleep $((ageing_time + 10))
+
+	bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37
+	check_err $? "FDB entry was aged out when should not"
+
+	$MZ $h2 -c 1 -p 64 -a $mac -t ip -q
+
+	bridge fdb show brport $swp2 | grep -q de:ad:be:ef:13:37
+	check_err $? "FDB entry did not roam when should"
+
+	log_test "Externally learned FDB entry - ageing & roaming"
+
+	bridge fdb del de:ad:be:ef:13:37 dev $swp2 master vlan 1 &> /dev/null
+	bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
new file mode 100755
index 000000000..c15c6c85c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+	# 10 Seconds ageing time.
+	ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+
+	ip link set dev $swp1 master br0
+	ip link set dev $swp2 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp2 down
+	ip link set dev $swp1 down
+
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+	learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+	flood_test $swp2 $h1 $h2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644
index 000000000..5cd2aed97
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/config
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
new file mode 100644
index 000000000..5ab1e5f43
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Source library
+
+relative_path="${BASH_SOURCE%/*}"
+if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
+	relative_path="."
+fi
+
+source "$relative_path/lib.sh"
+
+##############################################################################
+# Defines
+
+DEVLINK_DEV=$(devlink port show | grep "${NETIFS[p1]}" | \
+	      grep -v "${NETIFS[p1]}[0-9]" | cut -d" " -f1 | \
+	      rev | cut -d"/" -f2- | rev)
+if [ -z "$DEVLINK_DEV" ]; then
+	echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
+	exit 1
+fi
+if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
+	echo "SKIP: devlink device's bus is not PCI"
+	exit 1
+fi
+
+DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
+		 -n | cut -d" " -f3)
+
+##############################################################################
+# Sanity checks
+
+devlink -j resource show "$DEVLINK_DEV" &> /dev/null
+if [ $? -ne 0 ]; then
+	echo "SKIP: iproute2 too old, missing devlink resource support"
+	exit 1
+fi
+
+##############################################################################
+# Devlink helpers
+
+devlink_resource_names_to_path()
+{
+	local resource
+	local path=""
+
+	for resource in "${@}"; do
+		if [ "$path" == "" ]; then
+			path="$resource"
+		else
+			path="${path}/$resource"
+		fi
+	done
+
+	echo "$path"
+}
+
+devlink_resource_get()
+{
+	local name=$1
+	local resource_name=.[][\"$DEVLINK_DEV\"]
+
+	resource_name="$resource_name | .[] | select (.name == \"$name\")"
+
+	shift
+	for resource in "${@}"; do
+		resource_name="${resource_name} | .[\"resources\"][] | \
+			       select (.name == \"$resource\")"
+	done
+
+	devlink -j resource show "$DEVLINK_DEV" | jq "$resource_name"
+}
+
+devlink_resource_size_get()
+{
+	local size=$(devlink_resource_get "$@" | jq '.["size_new"]')
+
+	if [ "$size" == "null" ]; then
+		devlink_resource_get "$@" | jq '.["size"]'
+	else
+		echo "$size"
+	fi
+}
+
+devlink_resource_size_set()
+{
+	local new_size=$1
+	local path
+
+	shift
+	path=$(devlink_resource_names_to_path "$@")
+	devlink resource set "$DEVLINK_DEV" path "$path" size "$new_size"
+	check_err $? "Failed setting path $path to size $size"
+}
+
+devlink_reload()
+{
+	local still_pending
+
+	devlink dev reload "$DEVLINK_DEV" &> /dev/null
+	check_err $? "Failed reload"
+
+	still_pending=$(devlink resource show "$DEVLINK_DEV" | \
+			grep -c "size_new")
+	check_err $still_pending "Failed reload - There are still unset sizes"
+}
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644
index 000000000..e819d049d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
+# Type of network interface to create
+NETIF_TYPE=veth
+# Whether to create virtual interfaces (veth) or not
+NETIF_CREATE=yes
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
new file mode 100755
index 000000000..a8d8e8b3d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |      192.0.2.1/28 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|------------------------+
+# | SW1               |                        |
+# |              $ol1 +                        |
+# |      192.0.2.2/28                          |
+# |                                            |
+# |  + g1a (gre)          + g1b (gre)          |
+# |    loc=192.0.2.65       loc=192.0.2.81     |
+# |    rem=192.0.2.66 --.   rem=192.0.2.82 --. |
+# |    tos=inherit      |   tos=inherit      | |
+# |  .------------------'                    | |
+# |  |                    .------------------' |
+# |  v                    v                    |
+# |  + $ul1.111 (vlan)    + $ul1.222 (vlan)    |
+# |  | 192.0.2.129/28     | 192.0.2.145/28     |
+# |   \                  /                     |
+# |    \________________/                      |
+# |            |                               |
+# |            + $ul1                          |
+# +------------|-------------------------------+
+#              |
+# +------------|-------------------------------+
+# | SW2        + $ul2                          |
+# |     _______|________                       |
+# |    /                \                      |
+# |   /                  \                     |
+# |  + $ul2.111 (vlan)    + $ul2.222 (vlan)    |
+# |  ^ 192.0.2.130/28     ^ 192.0.2.146/28     |
+# |  |                    |                    |
+# |  |                    '------------------. |
+# |  '------------------.                    | |
+# |  + g2a (gre)        | + g2b (gre)        | |
+# |    loc=192.0.2.66   |   loc=192.0.2.82   | |
+# |    rem=192.0.2.65 --'   rem=192.0.2.81 --' |
+# |    tos=inherit          tos=inherit        |
+# |                                            |
+# |              $ol2 +                        |
+# |     192.0.2.17/28 |                        |
+# +-------------------|------------------------+
+#                     |
+# +-------------------|-----+
+# | H2                |     |
+# |               $h2 +     |
+# |     192.0.2.18/28       |
+# +-------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	multipath_ipv4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+	simple_if_init $ol1 192.0.2.2/28
+	__simple_if_init $ul1 v$ol1
+	vlan_create $ul1 111 v$ol1 192.0.2.129/28
+	vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+	tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+	__simple_if_init g1a v$ol1 192.0.2.65/32
+	ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+	tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+	__simple_if_init g1b v$ol1 192.0.2.81/32
+	ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+	ip route add vrf v$ol1 192.0.2.16/28 \
+	   nexthop dev g1a \
+	   nexthop dev g1b
+}
+
+sw1_destroy()
+{
+	ip route del vrf v$ol1 192.0.2.16/28
+
+	ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+	__simple_if_fini g1b 192.0.2.81/32
+	tunnel_destroy g1b
+
+	ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+	__simple_if_fini g1a 192.0.2.65/32
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 222
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	simple_if_fini $ol1 192.0.2.2/28
+}
+
+sw2_create()
+{
+	simple_if_init $ol2 192.0.2.17/28
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 192.0.2.130/28
+	vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+	tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+	__simple_if_init g2a v$ol2 192.0.2.66/32
+	ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+	tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+	__simple_if_init g2b v$ol2 192.0.2.82/32
+	ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+	ip route add vrf v$ol2 192.0.2.0/28 \
+	   nexthop dev g2a \
+	   nexthop dev g2b
+
+	tc qdisc add dev $ul2 clsact
+	tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+	   flower vlan_id 111 action pass
+	tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+	   flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+	tc qdisc del dev $ul2 clsact
+
+	ip route del vrf v$ol2 192.0.2.0/28
+
+	ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+	__simple_if_fini g2b 192.0.2.82/32
+	tunnel_destroy g2b
+
+	ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+	__simple_if_fini g2a 192.0.2.66/32
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 222
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 192.0.2.17/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.18/28
+	ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+	simple_if_fini $h2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	vrf_prepare
+	h1_create
+	sw1_create
+	sw2_create
+	h2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	h2_destroy
+	sw2_destroy
+	sw1_destroy
+	h1_destroy
+	vrf_cleanup
+}
+
+multipath4_test()
+{
+	local what=$1; shift
+	local weight1=$1; shift
+	local weight2=$1; shift
+
+	sysctl_set net.ipv4.fib_multipath_hash_policy 1
+	ip route replace vrf v$ol1 192.0.2.16/28 \
+	   nexthop dev g1a weight $weight1 \
+	   nexthop dev g1b weight $weight2
+
+	local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	ip vrf exec v$h1 \
+	   $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+	local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+	local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+	local d111=$((t1_111 - t0_111))
+	local d222=$((t1_222 - t0_222))
+	multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+	ip route replace vrf v$ol1 192.0.2.16/28 \
+	   nexthop dev g1a \
+	   nexthop dev g1b
+	sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.18
+}
+
+multipath_ipv4()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644
index 000000000..08bac6cf1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -0,0 +1,964 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+ARPING=${ARPING:=arping}
+TEAMD=${TEAMD:=teamd}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+NETIF_TYPE=${NETIF_TYPE:=veth}
+NETIF_CREATE=${NETIF_CREATE:=yes}
+
+relative_path="${BASH_SOURCE%/*}"
+if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
+	relative_path="."
+fi
+
+if [[ -f $relative_path/forwarding.config ]]; then
+	source "$relative_path/forwarding.config"
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+	tc -j &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing JSON support"
+		exit 1
+	fi
+}
+
+check_tc_shblock_support()
+{
+	tc filter help 2>&1 | grep block &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing shared block support"
+		exit 1
+	fi
+}
+
+check_tc_chain_support()
+{
+	tc help 2>&1|grep chain &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: iproute2 too old; tc is missing chain support"
+		exit 1
+	fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+	echo "SKIP: need root privileges"
+	exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+	check_tc_version
+fi
+
+require_command()
+{
+	local cmd=$1; shift
+
+	if [[ ! -x "$(command -v "$cmd")" ]]; then
+		echo "SKIP: $cmd not installed"
+		exit 1
+	fi
+}
+
+require_command jq
+require_command $MZ
+
+if [[ ! -v NUM_NETIFS ]]; then
+	echo "SKIP: importer does not define \"NUM_NETIFS\""
+	exit 1
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+	if [[ "$count" -eq "0" ]]; then
+		unset NETIFS
+		declare -A NETIFS
+	fi
+	count=$((count + 1))
+	NETIFS[p$count]="$1"
+	shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+create_netif_veth()
+{
+	local i
+
+	for i in $(eval echo {1..$NUM_NETIFS}); do
+		local j=$((i+1))
+
+		ip link show dev ${NETIFS[p$i]} &> /dev/null
+		if [[ $? -ne 0 ]]; then
+			ip link add ${NETIFS[p$i]} type veth \
+				peer name ${NETIFS[p$j]}
+			if [[ $? -ne 0 ]]; then
+				echo "Failed to create netif"
+				exit 1
+			fi
+		fi
+		i=$j
+	done
+}
+
+create_netif()
+{
+	case "$NETIF_TYPE" in
+	veth) create_netif_veth
+	      ;;
+	*) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
+	   exit 1
+	   ;;
+	esac
+}
+
+if [[ "$NETIF_CREATE" = "yes" ]]; then
+	create_netif
+fi
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+	ip link show dev ${NETIFS[p$i]} &> /dev/null
+	if [[ $? -ne 0 ]]; then
+		echo "SKIP: could not find all required interfaces"
+		exit 1
+	fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+	local err=$1
+	local msg=$2
+
+	if [[ $RET -eq 0 && $err -ne 0 ]]; then
+		RET=$err
+		retmsg=$msg
+	fi
+}
+
+check_fail()
+{
+	local err=$1
+	local msg=$2
+
+	if [[ $RET -eq 0 && $err -eq 0 ]]; then
+		RET=1
+		retmsg=$msg
+	fi
+}
+
+check_err_fail()
+{
+	local should_fail=$1; shift
+	local err=$1; shift
+	local what=$1; shift
+
+	if ((should_fail)); then
+		check_fail $err "$what succeeded, but should have failed"
+	else
+		check_err $err "$what failed"
+	fi
+}
+
+log_test()
+{
+	local test_name=$1
+	local opt_str=$2
+
+	if [[ $# -eq 2 ]]; then
+		opt_str="($opt_str)"
+	fi
+
+	if [[ $RET -ne 0 ]]; then
+		EXIT_STATUS=1
+		printf "TEST: %-60s  [FAIL]\n" "$test_name $opt_str"
+		if [[ ! -z "$retmsg" ]]; then
+			printf "\t%s\n" "$retmsg"
+		fi
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo "Hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+		return 1
+	fi
+
+	printf "TEST: %-60s  [PASS]\n" "$test_name $opt_str"
+	return 0
+}
+
+log_info()
+{
+	local msg=$1
+
+	echo "INFO: $msg"
+}
+
+setup_wait_dev()
+{
+	local dev=$1; shift
+
+	while true; do
+		ip link show dev $dev up \
+			| grep 'state UP' &> /dev/null
+		if [[ $? -ne 0 ]]; then
+			sleep 1
+		else
+			break
+		fi
+	done
+}
+
+setup_wait()
+{
+	local num_netifs=${1:-$NUM_NETIFS}
+
+	for ((i = 1; i <= num_netifs; ++i)); do
+		setup_wait_dev ${NETIFS[p$i]}
+	done
+
+	# Make sure links are ready.
+	sleep $WAIT_TIME
+}
+
+lldpad_app_wait_set()
+{
+	local dev=$1; shift
+
+	while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
+		echo "$dev: waiting for lldpad to push pending APP updates"
+		sleep 5
+	done
+}
+
+lldpad_app_wait_del()
+{
+	# Give lldpad a chance to push down the changes. If the device is downed
+	# too soon, the updates will be left pending. However, they will have
+	# been struck off the lldpad's DB already, so we won't be able to tell
+	# they are pending. Then on next test iteration this would cause
+	# weirdness as newly-added APP rules conflict with the old ones,
+	# sometimes getting stuck in an "unknown" state.
+	sleep 5
+}
+
+pre_cleanup()
+{
+	if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+		echo "Pausing before cleanup, hit any key to continue"
+		read
+	fi
+}
+
+vrf_prepare()
+{
+	ip -4 rule add pref 32765 table local
+	ip -4 rule del pref 0
+	ip -6 rule add pref 32765 table local
+	ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+	ip -6 rule add pref 0 table local
+	ip -6 rule del pref 32765
+	ip -4 rule add pref 0 table local
+	ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+	local vrf_name=$1
+
+	__last_tb_id=$((__last_tb_id + 1))
+	__TB_IDS[$vrf_name]=$__last_tb_id
+	return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+	local vrf_name=$1
+
+	return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+	local vrf_name=$1
+	local tb_id
+
+	__vrf_td_id_assign $vrf_name
+	tb_id=$?
+
+	ip link add dev $vrf_name type vrf table $tb_id
+	ip -4 route add table $tb_id unreachable default metric 4278198272
+	ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+	local vrf_name=$1
+	local tb_id
+
+	__vrf_td_id_lookup $vrf_name
+	tb_id=$?
+
+	ip -6 route del table $tb_id unreachable default metric 4278198272
+	ip -4 route del table $tb_id unreachable default metric 4278198272
+	ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+	local if_name=$1
+	local add_del=$2
+	local array
+
+	shift
+	shift
+	array=("${@}")
+
+	for addrstr in "${array[@]}"; do
+		ip address $add_del $addrstr dev $if_name
+	done
+}
+
+__simple_if_init()
+{
+	local if_name=$1; shift
+	local vrf_name=$1; shift
+	local addrs=("${@}")
+
+	ip link set dev $if_name master $vrf_name
+	ip link set dev $if_name up
+
+	__addr_add_del $if_name add "${addrs[@]}"
+}
+
+__simple_if_fini()
+{
+	local if_name=$1; shift
+	local addrs=("${@}")
+
+	__addr_add_del $if_name del "${addrs[@]}"
+
+	ip link set dev $if_name down
+	ip link set dev $if_name nomaster
+}
+
+simple_if_init()
+{
+	local if_name=$1
+	local vrf_name
+	local array
+
+	shift
+	vrf_name=v$if_name
+	array=("${@}")
+
+	vrf_create $vrf_name
+	ip link set dev $vrf_name up
+	__simple_if_init $if_name $vrf_name "${array[@]}"
+}
+
+simple_if_fini()
+{
+	local if_name=$1
+	local vrf_name
+	local array
+
+	shift
+	vrf_name=v$if_name
+	array=("${@}")
+
+	__simple_if_fini $if_name "${array[@]}"
+	vrf_destroy $vrf_name
+}
+
+tunnel_create()
+{
+	local name=$1; shift
+	local type=$1; shift
+	local local=$1; shift
+	local remote=$1; shift
+
+	ip link add name $name type $type \
+	   local $local remote $remote "$@"
+	ip link set dev $name up
+}
+
+tunnel_destroy()
+{
+	local name=$1; shift
+
+	ip link del dev $name
+}
+
+vlan_create()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf=$1; shift
+	local ips=("${@}")
+	local name=$if_name.$vid
+
+	ip link add name $name link $if_name type vlan id $vid
+	if [ "$vrf" != "" ]; then
+		ip link set dev $name master $vrf
+	fi
+	ip link set dev $name up
+	__addr_add_del $name add "${ips[@]}"
+}
+
+vlan_destroy()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local name=$if_name.$vid
+
+	ip link del dev $name
+}
+
+team_create()
+{
+	local if_name=$1; shift
+	local mode=$1; shift
+
+	require_command $TEAMD
+	$TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
+	for slave in "$@"; do
+		ip link set dev $slave down
+		ip link set dev $slave master $if_name
+		ip link set dev $slave up
+	done
+	ip link set dev $if_name up
+}
+
+team_destroy()
+{
+	local if_name=$1; shift
+
+	$TEAMD -t $if_name -k
+}
+
+master_name_get()
+{
+	local if_name=$1
+
+	ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+       local if_name=$1
+
+       ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+tc_rule_stats_get()
+{
+	local dev=$1; shift
+	local pref=$1; shift
+	local dir=$1; shift
+
+	tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
+	    | jq '.[1].options.actions[].stats.packets'
+}
+
+mac_get()
+{
+	local if_name=$1
+
+	ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+	local bridge=$1
+	local ageing_time
+
+	# Need to divide by 100 to convert to seconds.
+	ageing_time=$(ip -j -d link show dev $bridge \
+		      | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+	echo $((ageing_time / 100))
+}
+
+declare -A SYSCTL_ORIG
+sysctl_set()
+{
+	local key=$1; shift
+	local value=$1; shift
+
+	SYSCTL_ORIG[$key]=$(sysctl -n $key)
+	sysctl -qw $key=$value
+}
+
+sysctl_restore()
+{
+	local key=$1; shift
+
+	sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+}
+
+forwarding_enable()
+{
+	sysctl_set net.ipv4.conf.all.forwarding 1
+	sysctl_set net.ipv6.conf.all.forwarding 1
+}
+
+forwarding_restore()
+{
+	sysctl_restore net.ipv6.conf.all.forwarding
+	sysctl_restore net.ipv4.conf.all.forwarding
+}
+
+tc_offload_check()
+{
+	local num_netifs=${1:-$NUM_NETIFS}
+
+	for ((i = 1; i <= num_netifs; ++i)); do
+		ethtool -k ${NETIFS[p$i]} \
+			| grep "hw-tc-offload: on" &> /dev/null
+		if [[ $? -ne 0 ]]; then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+trap_install()
+{
+	local dev=$1; shift
+	local direction=$1; shift
+
+	# Some devices may not support or need in-hardware trapping of traffic
+	# (e.g. the veth pairs that this library creates for non-existent
+	# loopbacks). Use continue instead, so that there is a filter in there
+	# (some tests check counters), and so that other filters are still
+	# processed.
+	tc filter add dev $dev $direction pref 1 \
+		flower skip_sw action trap 2>/dev/null \
+	    || tc filter add dev $dev $direction pref 1 \
+		       flower action continue
+}
+
+trap_uninstall()
+{
+	local dev=$1; shift
+	local direction=$1; shift
+
+	tc filter del dev $dev $direction pref 1 flower
+}
+
+slow_path_trap_install()
+{
+	# For slow-path testing, we need to install a trap to get to
+	# slow path the packets that would otherwise be switched in HW.
+	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+		trap_install "$@"
+	fi
+}
+
+slow_path_trap_uninstall()
+{
+	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+		trap_uninstall "$@"
+	fi
+}
+
+__icmp_capture_add_del()
+{
+	local add_del=$1; shift
+	local pref=$1; shift
+	local vsuf=$1; shift
+	local tundev=$1; shift
+	local filter=$1; shift
+
+	tc filter $add_del dev "$tundev" ingress \
+	   proto ip$vsuf pref $pref \
+	   flower ip_proto icmp$vsuf $filter \
+	   action pass
+}
+
+icmp_capture_install()
+{
+	__icmp_capture_add_del add 100 "" "$@"
+}
+
+icmp_capture_uninstall()
+{
+	__icmp_capture_add_del del 100 "" "$@"
+}
+
+icmp6_capture_install()
+{
+	__icmp_capture_add_del add 100 v6 "$@"
+}
+
+icmp6_capture_uninstall()
+{
+	__icmp_capture_add_del del 100 v6 "$@"
+}
+
+__vlan_capture_add_del()
+{
+	local add_del=$1; shift
+	local pref=$1; shift
+	local dev=$1; shift
+	local filter=$1; shift
+
+	tc filter $add_del dev "$dev" ingress \
+	   proto 802.1q pref $pref \
+	   flower $filter \
+	   action pass
+}
+
+vlan_capture_install()
+{
+	__vlan_capture_add_del add 100 "$@"
+}
+
+vlan_capture_uninstall()
+{
+	__vlan_capture_add_del del 100 "$@"
+}
+
+__dscp_capture_add_del()
+{
+	local add_del=$1; shift
+	local dev=$1; shift
+	local base=$1; shift
+	local dscp;
+
+	for prio in {0..7}; do
+		dscp=$((base + prio))
+		__icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
+				       "skip_hw ip_tos $((dscp << 2))"
+	done
+}
+
+dscp_capture_install()
+{
+	local dev=$1; shift
+	local base=$1; shift
+
+	__dscp_capture_add_del add $dev $base
+}
+
+dscp_capture_uninstall()
+{
+	local dev=$1; shift
+	local base=$1; shift
+
+	__dscp_capture_add_del del $dev $base
+}
+
+dscp_fetch_stats()
+{
+	local dev=$1; shift
+	local base=$1; shift
+
+	for prio in {0..7}; do
+		local dscp=$((base + prio))
+		local t=$(tc_rule_stats_get $dev $((dscp + 100)))
+		echo "[$dscp]=$t "
+	done
+}
+
+matchall_sink_create()
+{
+	local dev=$1; shift
+
+	tc qdisc add dev $dev clsact
+	tc filter add dev $dev ingress \
+	   pref 10000 \
+	   matchall \
+	   action drop
+}
+
+tests_run()
+{
+	local current_test
+
+	for current_test in ${TESTS:-$ALL_TESTS}; do
+		$current_test
+	done
+}
+
+multipath_eval()
+{
+	local desc="$1"
+	local weight_rp12=$2
+	local weight_rp13=$3
+	local packets_rp12=$4
+	local packets_rp13=$5
+	local weights_ratio packets_ratio diff
+
+	RET=0
+
+	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+		weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+				| bc -l)
+	else
+		weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
+				| bc -l)
+	fi
+
+	if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+	       check_err 1 "Packet difference is 0"
+	       log_test "Multipath"
+	       log_info "Expected ratio $weights_ratio"
+	       return
+	fi
+
+	if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+		packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+				| bc -l)
+	else
+		packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
+				| bc -l)
+	fi
+
+	diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+	diff=${diff#-}
+
+	test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
+	check_err $? "Too large discrepancy between expected and measured ratios"
+	log_test "$desc"
+	log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+##############################################################################
+# Tests
+
+ping_do()
+{
+	local if_name=$1
+	local dip=$2
+	local vrf_name
+
+	vrf_name=$(master_name_get $if_name)
+	ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+}
+
+ping_test()
+{
+	RET=0
+
+	ping_do $1 $2
+	check_err $?
+	log_test "ping"
+}
+
+ping6_do()
+{
+	local if_name=$1
+	local dip=$2
+	local vrf_name
+
+	vrf_name=$(master_name_get $if_name)
+	ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+}
+
+ping6_test()
+{
+	RET=0
+
+	ping6_do $1 $2
+	check_err $?
+	log_test "ping6"
+}
+
+learning_test()
+{
+	local bridge=$1
+	local br_port1=$2	# Connected to `host1_if`.
+	local host1_if=$3
+	local host2_if=$4
+	local mac=de:ad:be:ef:13:37
+	local ageing_time
+
+	RET=0
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	# Disable unknown unicast flooding on `br_port1` to make sure
+	# packets are only forwarded through the port after a matching
+	# FDB entry was installed.
+	bridge link set dev $br_port1 flood off
+
+	tc qdisc add dev $host1_if ingress
+	tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+		flower dst_mac $mac action drop
+
+	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host1_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	check_fail $? "Packet reached second host when should not"
+
+	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+	sleep 1
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_err $? "Did not find FDB record when should"
+
+	$MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host1_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	check_err $? "Packet did not reach second host when should"
+
+	# Wait for 10 seconds after the ageing time to make sure FDB
+	# record was aged-out.
+	ageing_time=$(bridge_ageing_time_get $bridge)
+	sleep $((ageing_time + 10))
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	bridge link set dev $br_port1 learning off
+
+	$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+	sleep 1
+
+	bridge -j fdb show br $bridge brport $br_port1 \
+		| jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+	check_fail $? "Found FDB record when should not"
+
+	bridge link set dev $br_port1 learning on
+
+	tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host1_if ingress
+
+	bridge link set dev $br_port1 flood on
+
+	log_test "FDB learning"
+}
+
+flood_test_do()
+{
+	local should_flood=$1
+	local mac=$2
+	local ip=$3
+	local host1_if=$4
+	local host2_if=$5
+	local err=0
+
+	# Add an ACL on `host2_if` which will tell us whether the packet
+	# was flooded to it or not.
+	tc qdisc add dev $host2_if ingress
+	tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+		flower dst_mac $mac action drop
+
+	$MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+	sleep 1
+
+	tc -j -s filter show dev $host2_if ingress \
+		| jq -e ".[] | select(.options.handle == 101) \
+		| select(.options.actions[0].stats.packets == 1)" &> /dev/null
+	if [[ $? -ne 0 && $should_flood == "true" || \
+	      $? -eq 0 && $should_flood == "false" ]]; then
+		err=1
+	fi
+
+	tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+	tc qdisc del dev $host2_if ingress
+
+	return $err
+}
+
+flood_unicast_test()
+{
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+	local mac=de:ad:be:ef:13:37
+	local ip=192.0.2.100
+
+	RET=0
+
+	bridge link set dev $br_port flood off
+
+	flood_test_do false $mac $ip $host1_if $host2_if
+	check_err $? "Packet flooded when should not"
+
+	bridge link set dev $br_port flood on
+
+	flood_test_do true $mac $ip $host1_if $host2_if
+	check_err $? "Packet was not flooded when should"
+
+	log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+	local mac=01:00:5e:00:00:01
+	local ip=239.0.0.1
+
+	RET=0
+
+	bridge link set dev $br_port mcast_flood off
+
+	flood_test_do false $mac $ip $host1_if $host2_if
+	check_err $? "Packet flooded when should not"
+
+	bridge link set dev $br_port mcast_flood on
+
+	flood_test_do true $mac $ip $host1_if $host2_if
+	check_err $? "Packet was not flooded when should"
+
+	log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+	# `br_port` is connected to `host2_if`
+	local br_port=$1
+	local host1_if=$2
+	local host2_if=$3
+
+	flood_unicast_test $br_port $host1_if $host2_if
+	flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
new file mode 100755
index 000000000..026644360
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the device to mirror to is a
+# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated,
+# and another gretap / ip6gretap netdevice is then capable of decapsulating the
+# traffic. Test that the payload is what is expected (ICMP ping request or
+# reply, depending on test).
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+	test_gretap_mac
+	test_ip6gretap_mac
+	test_two_spans
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_mac()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local what=$1; shift
+
+	case "$direction" in
+	ingress) local src_mac=$(mac_get $h1); local dst_mac=$(mac_get $h2)
+		;;
+	egress) local src_mac=$(mac_get $h2); local dst_mac=$(mac_get $h1)
+		;;
+	esac
+
+	RET=0
+
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+	icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac"
+
+	mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10
+
+	icmp_capture_uninstall h3-${tundev}
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what: envelope MAC ($tcflags)"
+}
+
+test_two_spans()
+{
+	RET=0
+
+	mirror_install $swp1 ingress gt4 "matchall $tcflags"
+	mirror_install $swp1 egress gt6 "matchall $tcflags"
+	quick_test_span_gre_dir gt4 ingress
+	quick_test_span_gre_dir gt6 egress
+
+	mirror_uninstall $swp1 ingress
+	fail_test_span_gre_dir gt4 ingress
+	quick_test_span_gre_dir gt6 egress
+
+	mirror_install $swp1 ingress gt4 "matchall $tcflags"
+	mirror_uninstall $swp1 egress
+	quick_test_span_gre_dir gt4 ingress
+	fail_test_span_gre_dir gt6 egress
+
+	mirror_uninstall $swp1 ingress
+	log_test "two simultaneously configured mirrors ($tcflags)"
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+	full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_gretap_mac()
+{
+	test_span_gre_mac gt4 ingress "mirror to gretap"
+	test_span_gre_mac gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap_mac()
+{
+	test_span_gre_mac gt6 ingress "mirror to ip6gretap"
+	test_span_gre_mac gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
new file mode 100755
index 000000000..360ca133b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   | +---------------------------------------------------------------------+ |
+#   | | OL                      + gt6 (ip6gretap)      + gt4 (gretap)       | |
+#   | |                         : loc=2001:db8:2::1    : loc=192.0.2.129    | |
+#   | |                         : rem=2001:db8:2::2    : rem=192.0.2.130    | |
+#   | |                         : ttl=100              : ttl=100            | |
+#   | |                         : tos=inherit          : tos=inherit        | |
+#   | +-------------------------:--|-------------------:--|-----------------+ |
+#   |                           :  |                   :  |                   |
+#   | +-------------------------:--|-------------------:--|-----------------+ |
+#   | | UL                      :  |,---------------------'                 | |
+#   | |   + $swp3               :  ||                  :                    | |
+#   | |   | 192.0.2.129/28      :  vv                  :                    | |
+#   | |   | 2001:db8:2::1/64    :  + ul (dummy)        :                    | |
+#   | +---|---------------------:----------------------:--------------------+ |
+#   +-----|---------------------:----------------------:----------------------+
+#         |                     :                      :
+#   +-----|---------------------:----------------------:----------------------+
+#   | H3  + $h3                 + h3-gt6 (ip6gretap)   + h3-gt4 (gretap)      |
+#   |       192.0.2.130/28        loc=2001:db8:2::2      loc=192.0.2.130      |
+#   |       2001:db8:2::2/64      rem=2001:db8:2::1      rem=192.0.2.129      |
+#   |                             ttl=100                ttl=100              |
+#   |                             tos=inherit            tos=inherit          |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+#
+# This tests mirroring to gretap and ip6gretap configured in an overlay /
+# underlay manner, i.e. with a bound dummy device that marks underlay VRF where
+# the encapsulated packed should be routed.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64
+
+	tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+	ip link set h3-gt4 vrf v$h3
+	matchall_sink_create h3-gt4
+
+	tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+	ip link set h3-gt6 vrf v$h3
+	matchall_sink_create h3-gt6
+}
+
+h3_destroy()
+{
+	tunnel_destroy h3-gt6
+	tunnel_destroy h3-gt4
+
+	simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+switch_create()
+{
+	# Bridge between H1 and H2.
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+
+	# Underlay.
+
+	simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+	ip link add name ul type dummy
+	ip link set dev ul master v$swp3
+	ip link set dev ul up
+
+	# Overlay.
+
+	vrf_create vrf-ol
+	ip link set dev vrf-ol up
+
+	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+		      ttl 100 tos inherit dev ul
+	ip link set dev gt4 master vrf-ol
+	ip link set dev gt4 up
+
+	tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+		      ttl 100 tos inherit dev ul allow-localremote
+	ip link set dev gt6 master vrf-ol
+	ip link set dev gt6 up
+}
+
+switch_destroy()
+{
+	vrf_destroy vrf-ol
+
+	tunnel_destroy gt6
+	tunnel_destroy gt4
+
+	simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+	ip link del dev ul
+
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL"
+	full_test_span_gre_dir gt4 egress  0 8 "mirror to gretap w/ UL"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL"
+	full_test_span_gre_dir gt6 egress  0 8 "mirror to ip6gretap w/ UL"
+}
+
+test_all()
+{
+	RET=0
+
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
new file mode 100755
index 000000000..c5095da7f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d).
+#
+# This test uses standard topology for testing mirror-to-gretap. See
+# mirror_gre_topo_lib.sh for more details. The full topology is as follows:
+#
+#  +---------------------+                             +---------------------+
+#  | H1                  |                             |                  H2 |
+#  |     + $h1           |                             |           $h2 +     |
+#  |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#  +-----|---------------+                             +---------------|-----+
+#        |                                                             |
+#  +-----|-------------------------------------------------------------|-----+
+#  | SW  o---> mirror                                                  |     |
+#  | +---|-------------------------------------------------------------|---+ |
+#  | |   + $swp1            + br1 (802.1q bridge)                $swp2 +   | |
+#  | +---------------------------------------------------------------------+ |
+#  |                                                                         |
+#  | +---------------------------------------------------------------------+ |
+#  | |                      + br2 (802.1d bridge)                          | |
+#  | |                        192.0.2.129/28                               | |
+#  | |   + $swp3              2001:db8:2::1/64                             | |
+#  | +---|-----------------------------------------------------------------+ |
+#  |     |                                          ^                    ^   |
+#  |     |                     + gt6 (ip6gretap)    | + gt4 (gretap)     |   |
+#  |     |                     : loc=2001:db8:2::1  | : loc=192.0.2.129  |   |
+#  |     |                     : rem=2001:db8:2::2 -+ : rem=192.0.2.130 -+   |
+#  |     |                     : ttl=100              : ttl=100              |
+#  |     |                     : tos=inherit          : tos=inherit          |
+#  +-----|---------------------:----------------------:----------------------+
+#        |                     :                      :
+#  +-----|---------------------:----------------------:----------------------+
+#  | H3  + $h3                 + h3-gt6(ip6gretap)    + h3-gt4 (gretap)      |
+#  |       192.0.2.130/28        loc=2001:db8:2::2      loc=192.0.2.130      |
+#  |       2001:db8:2::2/64      rem=2001:db8:2::1      rem=192.0.2.129      |
+#  |                             ttl=100                ttl=100              |
+#  |                             tos=inherit            tos=inherit          |
+#  +-------------------------------------------------------------------------+
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev br2 up
+
+	ip link set dev $swp3 master br2
+	ip route add 192.0.2.130/32 dev br2
+	ip -6 route add 2001:db8:2::2/128 dev br2
+
+	ip address add dev br2 192.0.2.129/28
+	ip address add dev br2 2001:db8:2::1/64
+
+	ip address add dev $h3 192.0.2.130/28
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $h3 192.0.2.130/28
+	ip link del dev br2
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+	full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
new file mode 100755
index 000000000..f8cda822c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d). The device attached to that
+# bridge is a VLAN.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+	test_gretap_stp
+	test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev br2 up
+
+	vlan_create $swp3 555
+
+	ip link set dev $swp3.555 master br2
+	ip route add 192.0.2.130/32 dev br2
+	ip -6 route add 2001:db8:2::2/128 dev br2
+
+	ip address add dev br2 192.0.2.129/32
+	ip address add dev br2 2001:db8:2::1/128
+
+	vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vlan_destroy $h3 555
+	ip link del dev br2
+	vlan_destroy $swp3 555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_vlan_match()
+{
+	local tundev=$1; shift
+	local vlan_match=$1; shift
+	local what=$1; shift
+
+	full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+	full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+	test_vlan_match gt4 'skip_hw vlan_id 555 vlan_ethtype ip' \
+			"mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+			"mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+	# Sometimes after mirror installation, the neighbor's state is not valid.
+	# The reason is that there is no SW datapath activity related to the
+	# neighbor for the remote GRE address. Therefore whether the corresponding
+	# neighbor will be valid is a matter of luck, and the test is thus racy.
+	# Set the neighbor's state to permanent, so it would be always valid.
+	ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+		nud permanent dev br2
+	full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+	ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+		nud permanent dev br2
+	full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
new file mode 100755
index 000000000..9ff22f280
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device with vlan filtering (802.1q).
+#
+# This test uses standard topology for testing mirror-to-gretap. See
+# mirror_gre_topo_lib.sh for more details. The full topology is as follows:
+#
+#  +---------------------+                               +---------------------+
+#  | H1                  |                               |                  H2 |
+#  |     + $h1           |                               |           $h2 +     |
+#  |     | 192.0.2.1/28  |                               |  192.0.2.2/28 |     |
+#  +-----|---------------+                               +---------------|-----+
+#        |                                                               |
+#  +-----|---------------------------------------------------------------|-----+
+#  | SW  o---> mirror                                                    |     |
+#  | +---|---------------------------------------------------------------|---+ |
+#  | |   + $swp1                  + br1 (802.1q bridge)            $swp2 +   | |
+#  | |                              192.0.2.129/28                           | |
+#  | |   + $swp3                    2001:db8:2::1/64                         | |
+#  | |   | vid555                   vid555[pvid,untagged]                    | |
+#  | +---|-------------------------------------------------------------------+ |
+#  |     |                                          ^                      ^   |
+#  |     |                     + gt6 (ip6gretap)    |   + gt4 (gretap)     |   |
+#  |     |                     : loc=2001:db8:2::1  |   : loc=192.0.2.129  |   |
+#  |     |                     : rem=2001:db8:2::2 -+   : rem=192.0.2.130 -+   |
+#  |     |                     : ttl=100                : ttl=100              |
+#  |     |                     : tos=inherit            : tos=inherit          |
+#  +-----|---------------------:------------------------:----------------------+
+#        |                     :                        :
+#  +-----|---------------------:------------------------:----------------------+
+#  | H3  + $h3                 + h3-gt6(ip6gretap)      + h3-gt4 (gretap)      |
+#  |     |                       loc=2001:db8:2::2        loc=192.0.2.130      |
+#  |     + $h3.555               rem=2001:db8:2::1        rem=192.0.2.129      |
+#  |       192.0.2.130/28        ttl=100                  ttl=100              |
+#  |       2001:db8:2::2/64      tos=inherit              tos=inherit          |
+#  +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+	# Avoid changing br1's PVID while it is operational as a L3 interface.
+	ip link set dev br1 down
+
+	ip link set dev $swp3 master br1
+	bridge vlan add dev br1 vid 555 pvid untagged self
+	ip link set dev br1 up
+	ip address add dev br1 192.0.2.129/28
+	ip address add dev br1 2001:db8:2::1/64
+
+	ip -4 route add 192.0.2.130/32 dev br1
+	ip -6 route add 2001:db8:2::2/128 dev br1
+
+	vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+	bridge vlan add dev $swp3 vid 555
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp3 nomaster
+	vlan_destroy $h3 555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+	full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+tests()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+tests
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
new file mode 100755
index 000000000..61844caf6
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device with vlan filtering (802.1q), and the egress device is a team
+# device.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |     + $h1.333        |                             |        $h1.555 +     |
+# |     | 192.0.2.1/28   |                             |  192.0.2.18/28 |     |
+# +-----|----------------+                             +----------------|-----+
+#       |                               $h1                             |
+#       +--------------------------------+------------------------------+
+#                                        |
+# +--------------------------------------|------------------------------------+
+# | SW                                   o---> mirror                         |
+# |                                      |                                    |
+# |     +--------------------------------+------------------------------+     |
+# |     |                              $swp1                            |     |
+# |     + $swp1.333                                           $swp1.555 +     |
+# |       192.0.2.2/28                                    192.0.2.17/28       |
+# |                                                                           |
+# | +-----------------------------------------------------------------------+ |
+# | |                        BR1 (802.1q)                                   | |
+# | |     + lag (team)       192.0.2.129/28                                 | |
+# | |    / \                 2001:db8:2::1/64                               | |
+# | +---/---\---------------------------------------------------------------+ |
+# |    /     \                                                            ^   |
+# |   |       \                                        + gt4 (gretap)     |   |
+# |   |        \                                         loc=192.0.2.129  |   |
+# |   |         \                                        rem=192.0.2.130 -+   |
+# |   |          \                                       ttl=100              |
+# |   |           \                                      tos=inherit          |
+# |   |            \                                                          |
+# |   |             \_________________________________                        |
+# |   |                                               \                       |
+# |   + $swp3                                          + $swp4                |
+# +---|------------------------------------------------|----------------------+
+#     |                                                |
+# +---|----------------------+                     +---|----------------------+
+# |   + $h3               H3 |                     |   + $h4               H4 |
+# |     192.0.2.130/28       |                     |     192.0.2.130/28       |
+# |     2001:db8:2::2/64     |                     |     2001:db8:2::2/64     |
+# +--------------------------+                     +--------------------------+
+
+ALL_TESTS="
+	test_mirror_gretap_first
+	test_mirror_gretap_second
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+require_command $ARPING
+
+vlan_host_create()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf_name=$1; shift
+	local ips=("${@}")
+
+	vrf_create $vrf_name
+	ip link set dev $vrf_name up
+	vlan_create $if_name $vid $vrf_name "${ips[@]}"
+}
+
+vlan_host_destroy()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf_name=$1; shift
+
+	vlan_destroy $if_name $vid
+	ip link set dev $vrf_name down
+	vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+	vlan_host_create $h1 333 vrf-h1 192.0.2.1/28
+	ip -4 route add 192.0.2.16/28 vrf vrf-h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip -4 route del 192.0.2.16/28 vrf vrf-h1
+	vlan_host_destroy $h1 333 vrf-h1
+}
+
+h2_create()
+{
+	vlan_host_create $h1 555 vrf-h2 192.0.2.18/28
+	ip -4 route add 192.0.2.0/28 vrf vrf-h2 nexthop via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip -4 route del 192.0.2.0/28 vrf vrf-h2
+	vlan_host_destroy $h1 555 vrf-h2
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.130/28
+	tc qdisc add dev $h3 clsact
+}
+
+h3_destroy()
+{
+	tc qdisc del dev $h3 clsact
+	simple_if_fini $h3 192.0.2.130/28
+}
+
+h4_create()
+{
+	simple_if_init $h4 192.0.2.130/28
+	tc qdisc add dev $h4 clsact
+}
+
+h4_destroy()
+{
+	tc qdisc del dev $h4 clsact
+	simple_if_fini $h4 192.0.2.130/28
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+	vlan_create $swp1 333 "" 192.0.2.2/28
+	vlan_create $swp1 555 "" 192.0.2.17/28
+
+	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+		      ttl 100 tos inherit
+
+	ip link set dev $swp3 up
+	ip link set dev $swp4 up
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+	__addr_add_del br1 add 192.0.2.129/32
+	ip -4 route add 192.0.2.130/32 dev br1
+
+	team_create lag loadbalance $swp3 $swp4
+	ip link set dev lag master br1
+}
+
+switch_destroy()
+{
+	ip link set dev lag nomaster
+	team_destroy lag
+
+	ip -4 route del 192.0.2.130/32 dev br1
+	__addr_add_del br1 del 192.0.2.129/32
+	ip link set dev br1 down
+	ip link del dev br1
+
+	ip link set dev $swp4 down
+	ip link set dev $swp3 down
+
+	tunnel_destroy gt4
+
+	vlan_destroy $swp1 555
+	vlan_destroy $swp1 333
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp3=${NETIFS[p3]}
+	h3=${NETIFS[p4]}
+
+	swp4=${NETIFS[p5]}
+	h4=${NETIFS[p6]}
+
+	vrf_prepare
+
+	ip link set dev $h1 up
+	h1_create
+	h2_create
+	h3_create
+	h4_create
+	switch_create
+
+	trap_install $h3 ingress
+	trap_install $h4 ingress
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	trap_uninstall $h4 ingress
+	trap_uninstall $h3 ingress
+
+	switch_destroy
+	h4_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+	ip link set dev $h1 down
+
+	vrf_cleanup
+}
+
+test_lag_slave()
+{
+	local host_dev=$1; shift
+	local up_dev=$1; shift
+	local down_dev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress gt4 \
+		       "proto 802.1q flower vlan_id 333 $tcflags"
+
+	# Test connectivity through $up_dev when $down_dev is set down.
+	ip link set dev $down_dev down
+	setup_wait_dev $up_dev
+	setup_wait_dev $host_dev
+	$ARPING -I br1 192.0.2.130 -qfc 1
+	sleep 2
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10
+
+	# Test lack of connectivity when both slaves are down.
+	ip link set dev $up_dev down
+	sleep 2
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+
+	ip link set dev $up_dev up
+	ip link set dev $down_dev up
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what ($tcflags)"
+}
+
+test_mirror_gretap_first()
+{
+	test_lag_slave $h3 $swp3 $swp4 "mirror to gretap: LAG first slave"
+}
+
+test_mirror_gretap_second()
+{
+	test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
new file mode 100755
index 000000000..135902aa8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -0,0 +1,271 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test how mirrors to gretap and ip6gretap react to changes to relevant
+# configuration.
+
+ALL_TESTS="
+	test_ttl
+	test_tun_up
+	test_egress_up
+	test_remote_ip
+	test_tun_del
+	test_route_del
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	# This test downs $swp3, which deletes the configured IPv6 address
+	# unless this sysctl is set.
+	sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_ttl()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local prot=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	tc filter add dev $h3 ingress pref 77 prot $prot \
+		flower ip_ttl 50 action pass
+
+	mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
+
+	ip link set dev $tundev type $type ttl 50
+	mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+	ip link set dev $tundev type $type ttl 100
+	tc filter del dev $h3 ingress pref 77
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: TTL change ($tcflags)"
+}
+
+test_span_gre_tun_up()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev down
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev up
+
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: tunnel down/up ($tcflags)"
+}
+
+test_span_gre_egress_up()
+{
+	local tundev=$1; shift
+	local remote_ip=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $swp3 down
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	# After setting the device up, wait for neighbor to get resolved so that
+	# we can expect mirroring to work.
+	ip link set dev $swp3 up
+	setup_wait_dev $swp3
+	ping -c 1 -I $swp3 $remote_ip &>/dev/null
+
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: egress down/up ($tcflags)"
+}
+
+test_span_gre_remote_ip()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local correct_ip=$1; shift
+	local wrong_ip=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type remote $wrong_ip
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type remote $correct_ip
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: remote address change ($tcflags)"
+}
+
+test_span_gre_tun_del()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local flags=$1; shift
+	local local_ip=$1; shift
+	local remote_ip=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+	ip link del dev $tundev
+	fail_test_span_gre_dir $tundev ingress
+
+	tunnel_create $tundev $type $local_ip $remote_ip \
+		      ttl 100 tos inherit $flags
+
+	# Recreating the tunnel doesn't reestablish mirroring, so reinstall it
+	# and verify it works for the follow-up tests.
+	mirror_uninstall $swp1 ingress
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: tunnel deleted ($tcflags)"
+}
+
+test_span_gre_route_del()
+{
+	local tundev=$1; shift
+	local edev=$1; shift
+	local route=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	ip route del $route dev $edev
+	fail_test_span_gre_dir $tundev ingress
+
+	ip route add $route dev $edev
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: underlay route removal ($tcflags)"
+}
+
+test_ttl()
+{
+	test_span_gre_ttl gt4 gretap ip "mirror to gretap"
+	test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap"
+}
+
+test_tun_up()
+{
+	test_span_gre_tun_up gt4 "mirror to gretap"
+	test_span_gre_tun_up gt6 "mirror to ip6gretap"
+}
+
+test_egress_up()
+{
+	test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap"
+	test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_remote_ip()
+{
+	test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap"
+	test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap"
+}
+
+test_tun_del()
+{
+	test_span_gre_tun_del gt4 gretap "" \
+			      192.0.2.129 192.0.2.130 "mirror to gretap"
+	test_span_gre_tun_del gt6 ip6gretap allow-localremote \
+			      2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_route_del()
+{
+	test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap"
+	test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
new file mode 100755
index 000000000..12914f406
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The
+# interfaces on H1 and H2 have two addresses each. Flower match on one of the
+# addresses is configured with mirror action. It is expected that when pinging
+# this address, mirroring takes place, whereas when pinging the other one,
+# there's no mirroring.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+
+	ip address add dev $h1 192.0.2.3/28
+	ip address add dev $h2 192.0.2.4/28
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h2 192.0.2.4/28
+	ip address del dev $h1 192.0.2.3/28
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_dir_acl()
+{
+	test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+fail_test_span_gre_dir_acl()
+{
+	fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+full_test_span_gre_dir_acl()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local match_dip=$1; shift
+	local what=$1; shift
+
+	mirror_install $swp1 $direction $tundev \
+		       "protocol ip flower $tcflags dst_ip $match_dip"
+	fail_test_span_gre_dir $tundev $direction
+	test_span_gre_dir_acl "$tundev" "$direction" \
+			  "$forward_type" "$backward_type"
+	mirror_uninstall $swp1 $direction
+
+	# Test lack of mirroring after ACL mirror is uninstalled.
+	fail_test_span_gre_dir_acl "$tundev" "$direction"
+
+	log_test "$direction $what ($tcflags)"
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap"
+	full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap"
+	full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
+}
+
+test_all()
+{
+	RET=0
+
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
new file mode 100755
index 000000000..9edf4cb10
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
@@ -0,0 +1,285 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# team device.
+#
+# +----------------------+                             +----------------------+
+# | H1                   |                             |                   H2 |
+# |    + $h1.333         |                             |        $h1.555 +     |
+# |    | 192.0.2.1/28    |                             |  192.0.2.18/28 |     |
+# +----|-----------------+                             +----------------|-----+
+#      |                                $h1                             |
+#      +---------------------------------+------------------------------+
+#                                        |
+# +--------------------------------------|------------------------------------+
+# | SW                                   o---> mirror                         |
+# |                                      |                                    |
+# |   +----------------------------------+------------------------------+     |
+# |   |                                $swp1                            |     |
+# |   + $swp1.333                                             $swp1.555 +     |
+# |     192.0.2.2/28                                      192.0.2.17/28       |
+# |                                                                           |
+# |                                                                           |
+# |   + gt4 (gretap)      ,-> + lag1 (team)                                   |
+# |     loc=192.0.2.129   |   | 192.0.2.129/28                                |
+# |     rem=192.0.2.130 --'   |                                               |
+# |     ttl=100               |                                               |
+# |     tos=inherit           |                                               |
+# |      _____________________|______________________                         |
+# |     /                                            \                        |
+# |    /                                              \                       |
+# |   + $swp3                                          + $swp4                |
+# +---|------------------------------------------------|----------------------+
+#     |                                                |
+# +---|------------------------------------------------|----------------------+
+# |   + $h3                                            + $h4               H3 |
+# |    \                                              /                       |
+# |     \____________________________________________/                        |
+# |                           |                                               |
+# |                           + lag2 (team)                                   |
+# |                             192.0.2.130/28                                |
+# |                                                                           |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+	test_mirror_gretap_first
+	test_mirror_gretap_second
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+require_command $ARPING
+
+vlan_host_create()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf_name=$1; shift
+	local ips=("${@}")
+
+	vrf_create $vrf_name
+	ip link set dev $vrf_name up
+	vlan_create $if_name $vid $vrf_name "${ips[@]}"
+}
+
+vlan_host_destroy()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf_name=$1; shift
+
+	vlan_destroy $if_name $vid
+	ip link set dev $vrf_name down
+	vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+	vlan_host_create $h1 333 vrf-h1 192.0.2.1/28
+	ip -4 route add 192.0.2.16/28 vrf vrf-h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip -4 route del 192.0.2.16/28 vrf vrf-h1
+	vlan_host_destroy $h1 333 vrf-h1
+}
+
+h2_create()
+{
+	vlan_host_create $h1 555 vrf-h2 192.0.2.18/28
+	ip -4 route add 192.0.2.0/28 vrf vrf-h2 nexthop via 192.0.2.17
+}
+
+h2_destroy()
+{
+	ip -4 route del 192.0.2.0/28 vrf vrf-h2
+	vlan_host_destroy $h1 555 vrf-h2
+}
+
+h3_create_team()
+{
+	team_create lag2 lacp $h3 $h4
+	__simple_if_init lag2 vrf-h3 192.0.2.130/32
+	ip -4 route add vrf vrf-h3 192.0.2.129/32 dev lag2
+}
+
+h3_destroy_team()
+{
+	ip -4 route del vrf vrf-h3 192.0.2.129/32 dev lag2
+	__simple_if_fini lag2 192.0.2.130/32
+	team_destroy lag2
+
+	ip link set dev $h3 down
+	ip link set dev $h4 down
+}
+
+h3_create()
+{
+	vrf_create vrf-h3
+	ip link set dev vrf-h3 up
+	tc qdisc add dev $h3 clsact
+	tc qdisc add dev $h4 clsact
+	h3_create_team
+}
+
+h3_destroy()
+{
+	h3_destroy_team
+	tc qdisc del dev $h4 clsact
+	tc qdisc del dev $h3 clsact
+	ip link set dev vrf-h3 down
+	vrf_destroy vrf-h3
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+	vlan_create $swp1 333 "" 192.0.2.2/28
+	vlan_create $swp1 555 "" 192.0.2.17/28
+
+	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+		      ttl 100 tos inherit
+
+	ip link set dev $swp3 up
+	ip link set dev $swp4 up
+	team_create lag1 lacp $swp3 $swp4
+	__addr_add_del lag1 add 192.0.2.129/32
+	ip -4 route add 192.0.2.130/32 dev lag1
+}
+
+switch_destroy()
+{
+	ip -4 route del 192.0.2.130/32 dev lag1
+	__addr_add_del lag1 del 192.0.2.129/32
+	team_destroy lag1
+
+	ip link set dev $swp4 down
+	ip link set dev $swp3 down
+
+	tunnel_destroy gt4
+
+	vlan_destroy $swp1 555
+	vlan_destroy $swp1 333
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp3=${NETIFS[p3]}
+	h3=${NETIFS[p4]}
+
+	swp4=${NETIFS[p5]}
+	h4=${NETIFS[p6]}
+
+	vrf_prepare
+
+	ip link set dev $h1 up
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+
+	trap_install $h3 ingress
+	trap_install $h4 ingress
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	trap_uninstall $h4 ingress
+	trap_uninstall $h3 ingress
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+	ip link set dev $h1 down
+
+	vrf_cleanup
+}
+
+test_lag_slave()
+{
+	local up_dev=$1; shift
+	local down_dev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress gt4 \
+		       "proto 802.1q flower vlan_id 333 $tcflags"
+
+	# Move $down_dev away from the team. That will prompt change in
+	# txability of the connected device, without changing its upness. The
+	# driver should notice the txability change and move the traffic to the
+	# other slave.
+	ip link set dev $down_dev nomaster
+	sleep 2
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10
+
+	# Test lack of connectivity when neither slave is txable.
+	ip link set dev $up_dev nomaster
+	sleep 2
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
+	mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+	mirror_uninstall $swp1 ingress
+
+	# Recreate H3's team device, because mlxsw, which this test is
+	# predominantly mean to test, requires a bottom-up construction and
+	# doesn't allow enslavement to a device that already has an upper.
+	h3_destroy_team
+	h3_create_team
+	# Wait for ${h,swp}{3,4}.
+	setup_wait
+
+	log_test "$what ($tcflags)"
+}
+
+test_mirror_gretap_first()
+{
+	test_lag_slave $h3 $h4 "mirror to gretap: LAG first slave"
+}
+
+test_mirror_gretap_second()
+{
+	test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
new file mode 100644
index 000000000..fac486178
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: GPL-2.0
+
+source "$relative_path/mirror_lib.sh"
+
+quick_test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+
+	do_test_span_dir_ips 10 h3-$tundev "$@"
+}
+
+fail_test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+
+	do_test_span_dir_ips 0 h3-$tundev "$@"
+}
+
+test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+
+	test_span_dir_ips h3-$tundev "$@"
+}
+
+full_test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local what=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+	test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+			  "$backward_type" "$ip1" "$ip2"
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what ($tcflags)"
+}
+
+full_test_span_gre_dir_vlan_ips()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local vlan_match=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local what=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+
+	test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+			  "$backward_type" "$ip1" "$ip2"
+
+	tc filter add dev $h3 ingress pref 77 prot 802.1q \
+		flower $vlan_match \
+		action pass
+	mirror_test v$h1 $ip1 $ip2 $h3 77 10
+	tc filter del dev $h3 ingress pref 77
+
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what ($tcflags)"
+}
+
+quick_test_span_gre_dir()
+{
+	quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_gre_dir()
+{
+	fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_gre_dir()
+{
+	test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir()
+{
+	full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir_vlan()
+{
+	full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_stp_ips()
+{
+	local tundev=$1; shift
+	local nbpdev=$1; shift
+	local what=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+	local h3mac=$(mac_get $h3)
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+	bridge link set dev $nbpdev state disabled
+	sleep 1
+	fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+	bridge link set dev $nbpdev state forwarding
+	sleep 1
+	quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: STP state ($tcflags)"
+}
+
+full_test_span_gre_stp()
+{
+	full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
new file mode 100755
index 000000000..fc0508e40
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for
+# the tunnel remote address has invalid address at the time that the mirroring
+# is set up. Later on, the neighbor is deleted and it is expected to be
+# reinitialized using the usual ARP process, and the mirroring offload updated.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_neigh()
+{
+	local addr=$1; shift
+	local tundev=$1; shift
+	local direction=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+	ip neigh del dev $swp3 $addr
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what: neighbor change ($tcflags)"
+}
+
+test_gretap()
+{
+	test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
+	test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
+	test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
new file mode 100755
index 000000000..6f9ef1820
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint
+# is reachable through a next-hop route (as opposed to directly-attached route).
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	sysctl_set net.ipv4.conf.all.rp_filter 0
+	sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	sysctl_set net.ipv4.conf.v$h3.rp_filter 0
+
+	ip address add dev $swp3 192.0.2.161/28
+	ip address add dev $h3 192.0.2.162/28
+	ip address add dev gt4 192.0.2.129/32
+	ip address add dev h3-gt4 192.0.2.130/32
+
+	# IPv6 route can't be added after address. Such routes are rejected due
+	# to the gateway address having been configured on the local system. It
+	# works the other way around though.
+	ip address add dev $swp3 2001:db8:4::1/64
+	ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2
+	ip address add dev $h3 2001:db8:4::2/64
+	ip address add dev gt6 2001:db8:2::1
+	ip address add dev h3-gt6 2001:db8:2::2
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2
+	ip address del dev $h3 2001:db8:4::2/64
+	ip address del dev $swp3 2001:db8:4::1/64
+
+	ip address del dev $h3 192.0.2.162/28
+	ip address del dev $swp3 192.0.2.161/28
+
+	sysctl_restore net.ipv4.conf.v$h3.rp_filter 0
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+
+	sysctl_restore net.ipv4.conf.$h3.rp_filter
+	sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_gretap()
+{
+	RET=0
+	mirror_install $swp1 ingress gt4 "matchall $tcflags"
+
+	# For IPv4, test that there's no mirroring without the route directing
+	# the traffic to tunnel remote address. Then add it and test that
+	# mirroring starts. For IPv6 we can't test this due to the limitation
+	# that routes for locally-specified IPv6 addresses can't be added.
+	fail_test_span_gre_dir gt4 ingress
+
+	ip route add 192.0.2.130/32 via 192.0.2.162
+	quick_test_span_gre_dir gt4 ingress
+	ip route del 192.0.2.130/32 via 192.0.2.162
+
+	mirror_uninstall $swp1 ingress
+	log_test "mirror to gre with next-hop remote ($tcflags)"
+}
+
+test_ip6gretap()
+{
+	RET=0
+
+	mirror_install $swp1 ingress gt6 "matchall $tcflags"
+	quick_test_span_gre_dir gt6 ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "mirror to ip6gre with next-hop remote ($tcflags)"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
new file mode 100644
index 000000000..39c03e286
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring to gretap and ip6gretap
+# netdevices. The tests that use it tweak it in one way or another--importantly,
+# $swp3 and $h3 need to have addresses set up.
+#
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3               + gt6 (ip6gretap)      + gt4 (gretap)         |
+#   |     |                     : loc=2001:db8:2::1    : loc=192.0.2.129      |
+#   |     |                     : rem=2001:db8:2::2    : rem=192.0.2.130      |
+#   |     |                     : ttl=100              : ttl=100              |
+#   |     |                     : tos=inherit          : tos=inherit          |
+#   |     |                     :                      :                      |
+#   +-----|---------------------:----------------------:----------------------+
+#         |                     :                      :
+#   +-----|---------------------:----------------------:----------------------+
+#   | H3  + $h3                 + h3-gt6 (ip6gretap)   + h3-gt4 (gretap)      |
+#   |                             loc=2001:db8:2::2      loc=192.0.2.130      |
+#   |                             rem=2001:db8:2::1      rem=192.0.2.129      |
+#   |                             ttl=100                ttl=100              |
+#   |                             tos=inherit            tos=inherit          |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+source "$relative_path/mirror_topo_lib.sh"
+
+mirror_gre_topo_h3_create()
+{
+	mirror_topo_h3_create
+
+	tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+	ip link set h3-gt4 vrf v$h3
+	matchall_sink_create h3-gt4
+
+	tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+	ip link set h3-gt6 vrf v$h3
+	matchall_sink_create h3-gt6
+}
+
+mirror_gre_topo_h3_destroy()
+{
+	tunnel_destroy h3-gt6
+	tunnel_destroy h3-gt4
+
+	mirror_topo_h3_destroy
+}
+
+mirror_gre_topo_switch_create()
+{
+	mirror_topo_switch_create
+
+	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+		      ttl 100 tos inherit
+
+	tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+		      ttl 100 tos inherit allow-localremote
+}
+
+mirror_gre_topo_switch_destroy()
+{
+	tunnel_destroy gt6
+	tunnel_destroy gt4
+
+	mirror_topo_switch_destroy
+}
+
+mirror_gre_topo_create()
+{
+	mirror_topo_h1_create
+	mirror_topo_h2_create
+	mirror_gre_topo_h3_create
+
+	mirror_gre_topo_switch_create
+}
+
+mirror_gre_topo_destroy()
+{
+	mirror_gre_topo_switch_destroy
+
+	mirror_gre_topo_h3_destroy
+	mirror_topo_h2_destroy
+	mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
new file mode 100755
index 000000000..88cecdb9a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice
+# whose underlay route points at a vlan device.
+
+ALL_TESTS="
+	test_gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link add name $swp3.555 link $swp3 type vlan id 555
+	ip address add dev $swp3.555 192.0.2.129/32
+	ip address add dev $swp3.555 2001:db8:2::1/128
+	ip link set dev $swp3.555 up
+
+	ip route add 192.0.2.130/32 dev $swp3.555
+	ip -6 route add 2001:db8:2::2/128 dev $swp3.555
+
+	ip link add name $h3.555 link $h3 type vlan id 555
+	ip link set dev $h3.555 master v$h3
+	ip address add dev $h3.555 192.0.2.130/28
+	ip address add dev $h3.555 2001:db8:2::2/64
+	ip link set dev $h3.555 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link del dev $h3.555
+	ip link del dev $swp3.555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
new file mode 100755
index 000000000..204b25f13
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# vlan device on top of a bridge device with vlan filtering (802.1q).
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+	test_gretap_forbidden_cpu
+	test_ip6gretap_forbidden_cpu
+	test_gretap_forbidden_egress
+	test_ip6gretap_forbidden_egress
+	test_gretap_untagged_egress
+	test_ip6gretap_untagged_egress
+	test_gretap_fdb_roaming
+	test_ip6gretap_fdb_roaming
+	test_gretap_stp
+	test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+require_command $ARPING
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	# gt4's remote address is at $h3.555, not $h3. Thus the packets arriving
+	# directly to $h3 for test_gretap_untagged_egress() are rejected by
+	# rp_filter and the test spuriously fails.
+	sysctl_set net.ipv4.conf.all.rp_filter 0
+	sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128
+	bridge vlan add dev br1 vid 555 self
+	ip route rep 192.0.2.130/32 dev br1.555
+	ip -6 route rep 2001:db8:2::2/128 dev br1.555
+
+	vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+
+	ip link set dev $swp3 master br1
+	bridge vlan add dev $swp3 vid 555
+	bridge vlan add dev $swp2 vid 555
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp3 nomaster
+	vlan_destroy $h3 555
+	vlan_destroy br1 555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+
+	sysctl_restore net.ipv4.conf.$h3.rp_filter
+	sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_vlan_match()
+{
+	local tundev=$1; shift
+	local vlan_match=$1; shift
+	local what=$1; shift
+
+	full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+	full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+	test_vlan_match gt4 'skip_hw vlan_id 555 vlan_ethtype ip' \
+			"mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+			"mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_cpu()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	# Run the pass-test first, to prime neighbor table.
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	# Now forbid the VLAN at the bridge and see it fail.
+	bridge vlan del dev br1 vid 555 self
+	sleep 1
+	fail_test_span_gre_dir $tundev ingress
+
+	bridge vlan add dev br1 vid 555 self
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: vlan forbidden at a bridge ($tcflags)"
+}
+
+test_gretap_forbidden_cpu()
+{
+	test_span_gre_forbidden_cpu gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_cpu()
+{
+	test_span_gre_forbidden_cpu gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_egress()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	bridge vlan del dev $swp3 vid 555
+	sleep 1
+	fail_test_span_gre_dir $tundev ingress
+
+	bridge vlan add dev $swp3 vid 555
+	# Re-prime FDB
+	$ARPING -I br1.555 192.0.2.130 -fqc 1
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: vlan forbidden at a bridge egress ($tcflags)"
+}
+
+test_gretap_forbidden_egress()
+{
+	test_span_gre_forbidden_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_egress()
+{
+	test_span_gre_forbidden_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_untagged_egress()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+
+	quick_test_span_gre_dir $tundev ingress
+	quick_test_span_vlan_dir $h3 555 ingress
+
+	bridge vlan add dev $swp3 vid 555 pvid untagged
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+	fail_test_span_vlan_dir $h3 555 ingress
+
+	bridge vlan add dev $swp3 vid 555
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+	quick_test_span_vlan_dir $h3 555 ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: vlan untagged at a bridge egress ($tcflags)"
+}
+
+test_gretap_untagged_egress()
+{
+	test_span_gre_untagged_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_untagged_egress()
+{
+	test_span_gre_untagged_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_fdb_roaming()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+	local h3mac=$(mac_get $h3)
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	bridge fdb del dev $swp3 $h3mac vlan 555 master
+	bridge fdb add dev $swp2 $h3mac vlan 555 master
+	sleep 1
+	fail_test_span_gre_dir $tundev ingress
+
+	bridge fdb del dev $swp2 $h3mac vlan 555 master
+	# Re-prime FDB
+	$ARPING -I br1.555 192.0.2.130 -fqc 1
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: MAC roaming ($tcflags)"
+}
+
+test_gretap_fdb_roaming()
+{
+	test_span_gre_fdb_roaming gt4 "mirror to gretap"
+}
+
+test_ip6gretap_fdb_roaming()
+{
+	test_span_gre_fdb_roaming gt6 "mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+	full_test_span_gre_stp gt4 $swp3 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+	full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
new file mode 100644
index 000000000..07991e102
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: GPL-2.0
+
+mirror_install()
+{
+	local from_dev=$1; shift
+	local direction=$1; shift
+	local to_dev=$1; shift
+	local filter=$1; shift
+
+	tc filter add dev $from_dev $direction \
+	   pref 1000 $filter \
+	   action mirred egress mirror dev $to_dev
+}
+
+mirror_uninstall()
+{
+	local from_dev=$1; shift
+	local direction=$1; shift
+
+	tc filter del dev $swp1 $direction pref 1000
+}
+
+mirror_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local dev=$1; shift
+	local pref=$1; shift
+	local expect=$1; shift
+
+	local t0=$(tc_rule_stats_get $dev $pref)
+	ip vrf exec $vrf_name \
+	   ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	local t1=$(tc_rule_stats_get $dev $pref)
+	local delta=$((t1 - t0))
+	# Tolerate a couple stray extra packets.
+	((expect <= delta && delta <= expect + 2))
+	check_err $? "Expected to capture $expect packets, got $delta."
+}
+
+do_test_span_dir_ips()
+{
+	local expect=$1; shift
+	local dev=$1; shift
+	local direction=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	icmp_capture_install $dev
+	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+	icmp_capture_uninstall $dev
+}
+
+quick_test_span_dir_ips()
+{
+	do_test_span_dir_ips 10 "$@"
+}
+
+fail_test_span_dir_ips()
+{
+	do_test_span_dir_ips 0 "$@"
+}
+
+test_span_dir_ips()
+{
+	local dev=$1; shift
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
+
+	icmp_capture_install $dev "type $forward_type"
+	mirror_test v$h1 $ip1 $ip2 $dev 100 10
+	icmp_capture_uninstall $dev
+
+	icmp_capture_install $dev "type $backward_type"
+	mirror_test v$h2 $ip2 $ip1 $dev 100 10
+	icmp_capture_uninstall $dev
+}
+
+fail_test_span_dir()
+{
+	fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_dir()
+{
+	test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+do_test_span_vlan_dir_ips()
+{
+	local expect=$1; shift
+	local dev=$1; shift
+	local vid=$1; shift
+	local direction=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	# Install the capture as skip_hw to avoid double-counting of packets.
+	# The traffic is meant for local box anyway, so will be trapped to
+	# kernel.
+	vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip"
+	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+	vlan_capture_uninstall $dev
+}
+
+quick_test_span_vlan_dir_ips()
+{
+	do_test_span_vlan_dir_ips 10 "$@"
+}
+
+fail_test_span_vlan_dir_ips()
+{
+	do_test_span_vlan_dir_ips 0 "$@"
+}
+
+quick_test_span_vlan_dir()
+{
+	quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_vlan_dir()
+{
+	fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
new file mode 100644
index 000000000..04979e596
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring. The tests that use it
+# tweak it in one way or another--typically add more devices to the topology.
+#
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3                                                             |
+#   +-----|-------------------------------------------------------------------+
+#         |
+#   +-----|-------------------------------------------------------------------+
+#   | H3  + $h3                                                               |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+mirror_topo_h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+mirror_topo_h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+mirror_topo_h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+}
+
+mirror_topo_h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+mirror_topo_h3_create()
+{
+	simple_if_init $h3
+	tc qdisc add dev $h3 clsact
+}
+
+mirror_topo_h3_destroy()
+{
+	tc qdisc del dev $h3 clsact
+	simple_if_fini $h3
+}
+
+mirror_topo_switch_create()
+{
+	ip link set dev $swp3 up
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+}
+
+mirror_topo_switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link del dev br1
+
+	ip link set dev $swp3 down
+}
+
+mirror_topo_create()
+{
+	mirror_topo_h1_create
+	mirror_topo_h2_create
+	mirror_topo_h3_create
+
+	mirror_topo_switch_create
+}
+
+mirror_topo_destroy()
+{
+	mirror_topo_switch_destroy
+
+	mirror_topo_h3_destroy
+	mirror_topo_h2_destroy
+	mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
new file mode 100755
index 000000000..9ab2ce77b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh
+# for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a vlan device.
+
+ALL_TESTS="
+	test_vlan
+	test_tagged_vlan
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_topo_create
+
+	vlan_create $swp3 555
+
+	vlan_create $h3 555 v$h3
+	matchall_sink_create $h3.555
+
+	vlan_create $h1 111 v$h1 192.0.2.17/28
+	bridge vlan add dev $swp1 vid 111
+
+	vlan_create $h2 111 v$h2 192.0.2.18/28
+	bridge vlan add dev $swp2 vid 111
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vlan_destroy $h2 111
+	vlan_destroy $h1 111
+	vlan_destroy $h3 555
+	vlan_destroy $swp3 555
+
+	mirror_topo_destroy
+	vrf_cleanup
+}
+
+test_vlan_dir()
+{
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+	test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction mirror to vlan ($tcflags)"
+}
+
+test_vlan()
+{
+	test_vlan_dir ingress 8 0
+	test_vlan_dir egress 0 8
+}
+
+test_tagged_vlan_dir()
+{
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+	do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
+				  192.0.2.17 192.0.2.18
+	do_test_span_vlan_dir_ips  0 "$h3.555" 555 "$direction" \
+				  192.0.2.17 192.0.2.18
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction mirror tagged to vlan ($tcflags)"
+}
+
+test_tagged_vlan()
+{
+	test_tagged_vlan_dir ingress 8 0
+	test_tagged_vlan_dir egress 0 8
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+	trap_install $h3 ingress
+
+	tests_run
+
+	trap_uninstall $h3 ingress
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755
index 000000000..a75cb51cc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	ip address add 192.0.2.1/24 dev $rp1
+	ip address add 2001:db8:1::1/64 dev $rp1
+
+	ip address add 198.51.100.1/24 dev $rp2
+	ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 2001:db8:2::1/64 dev $rp2
+	ip address del 198.51.100.1/24 dev $rp2
+
+	ip address del 2001:db8:1::1/64 dev $rp1
+	ip address del 192.0.2.1/24 dev $rp1
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh
new file mode 100755
index 000000000..ebc596a27
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+	ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf v$h1
+	ip -4 route del 192.0.2.128/28 vrf v$h1
+	simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+	ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf v$h2
+	ip -4 route del 192.0.2.0/28 vrf v$h2
+	simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+	ip link set dev $swp2 up
+	__addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+	__addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+	ip link set dev $swp2 down
+
+	__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
new file mode 100755
index 000000000..fa6a88c50
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+	vlan
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64
+	ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del 2001:db8:2::/64 vrf v$h1
+	ip -4 route del 192.0.2.128/28 vrf v$h1
+	vlan_destroy $h1 555
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+	ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+	ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip -6 route del 2001:db8:1::/64 vrf v$h2
+	ip -4 route del 192.0.2.0/28 vrf v$h2
+	simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	bridge vlan add dev br1 vid 555 self pvid untagged
+	bridge vlan add dev $swp1 vid 555
+
+	__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+	ip link set dev $swp2 up
+	__addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+	__addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+	ip link set dev $swp2 down
+
+	__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+vlan()
+{
+	RET=0
+
+	bridge vlan add dev br1 vid 333 self
+	check_err $? "Can't add a non-PVID VLAN"
+	bridge vlan del dev br1 vid 333 self
+	check_err $? "Can't remove a non-PVID VLAN"
+
+	log_test "vlan"
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_broadcast.sh b/tools/testing/selftests/net/forwarding/router_broadcast.sh
new file mode 100755
index 000000000..7bd2ebb6e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_broadcast.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4"
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 198.51.200.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+}
+
+h1_destroy()
+{
+	ip route del 198.51.200.0/24 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 198.51.200.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+}
+
+h2_destroy()
+{
+	ip route del 198.51.200.0/24 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+h3_create()
+{
+	vrf_create "vrf-h3"
+	ip link set dev $h3 master vrf-h3
+
+	ip link set dev vrf-h3 up
+	ip link set dev $h3 up
+
+	ip address add 198.51.200.2/24 dev $h3
+
+	ip route add 192.0.2.0/24 vrf vrf-h3 nexthop via 198.51.200.1
+	ip route add 198.51.100.0/24 vrf vrf-h3 nexthop via 198.51.200.1
+}
+
+h3_destroy()
+{
+	ip route del 198.51.100.0/24 vrf vrf-h3
+	ip route del 192.0.2.0/24 vrf vrf-h3
+
+	ip address del 198.51.200.2/24 dev $h3
+
+	ip link set dev $h3 down
+	vrf_destroy "vrf-h3"
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+	ip link set dev $rp3 up
+
+	ip address add 192.0.2.1/24 dev $rp1
+
+	ip address add 198.51.100.1/24 dev $rp2
+	ip address add 198.51.200.1/24 dev $rp3
+}
+
+router_destroy()
+{
+	ip address del 198.51.200.1/24 dev $rp3
+	ip address del 198.51.100.1/24 dev $rp2
+
+	ip address del 192.0.2.1/24 dev $rp1
+
+	ip link set dev $rp3 down
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	rp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+bc_forwarding_disable()
+{
+	sysctl_set net.ipv4.conf.all.bc_forwarding 0
+	sysctl_set net.ipv4.conf.$rp1.bc_forwarding 0
+}
+
+bc_forwarding_enable()
+{
+	sysctl_set net.ipv4.conf.all.bc_forwarding 1
+	sysctl_set net.ipv4.conf.$rp1.bc_forwarding 1
+}
+
+bc_forwarding_restore()
+{
+	sysctl_restore net.ipv4.conf.$rp1.bc_forwarding
+	sysctl_restore net.ipv4.conf.all.bc_forwarding
+}
+
+ping_test_from()
+{
+	local oif=$1
+	local dip=$2
+	local from=$3
+	local fail=${4:-0}
+
+	RET=0
+
+	log_info "ping $dip, expected reply from $from"
+	ip vrf exec $(master_name_get $oif) \
+	$PING -I $oif $dip -c 10 -i 0.1 -w 2 -b 2>&1 | grep $from &> /dev/null
+	check_err_fail $fail $?
+}
+
+ping_ipv4()
+{
+	sysctl_set net.ipv4.icmp_echo_ignore_broadcasts 0
+
+	bc_forwarding_disable
+	log_info "bc_forwarding disabled on r1 =>"
+	ping_test_from $h1 198.51.100.255 192.0.2.1
+	log_test "h1 -> net2: reply from r1 (not forwarding)"
+	ping_test_from $h1 198.51.200.255 192.0.2.1
+	log_test "h1 -> net3: reply from r1 (not forwarding)"
+	ping_test_from $h1 192.0.2.255 192.0.2.1
+	log_test "h1 -> net1: reply from r1 (not dropping)"
+	ping_test_from $h1 255.255.255.255 192.0.2.1
+	log_test "h1 -> 255.255.255.255: reply from r1 (not forwarding)"
+
+	ping_test_from $h2 192.0.2.255 198.51.100.1
+	log_test "h2 -> net1: reply from r1 (not forwarding)"
+	ping_test_from $h2 198.51.200.255 198.51.100.1
+	log_test "h2 -> net3: reply from r1 (not forwarding)"
+	ping_test_from $h2 198.51.100.255 198.51.100.1
+	log_test "h2 -> net2: reply from r1 (not dropping)"
+	ping_test_from $h2 255.255.255.255 198.51.100.1
+	log_test "h2 -> 255.255.255.255: reply from r1 (not forwarding)"
+	bc_forwarding_restore
+
+	bc_forwarding_enable
+	log_info "bc_forwarding enabled on r1 =>"
+	ping_test_from $h1 198.51.100.255 198.51.100.2
+	log_test "h1 -> net2: reply from h2 (forwarding)"
+	ping_test_from $h1 198.51.200.255 198.51.200.2
+	log_test "h1 -> net3: reply from h3 (forwarding)"
+	ping_test_from $h1 192.0.2.255 192.0.2.1 1
+	log_test "h1 -> net1: no reply (dropping)"
+	ping_test_from $h1 255.255.255.255 192.0.2.1
+	log_test "h1 -> 255.255.255.255: reply from r1 (not forwarding)"
+
+	ping_test_from $h2 192.0.2.255 192.0.2.2
+	log_test "h2 -> net1: reply from h1 (forwarding)"
+	ping_test_from $h2 198.51.200.255 198.51.200.2
+	log_test "h2 -> net3: reply from h3 (forwarding)"
+	ping_test_from $h2 198.51.100.255 198.51.100.1 1
+	log_test "h2 -> net2: no reply (dropping)"
+	ping_test_from $h2 255.255.255.255 198.51.100.1
+	log_test "h2 -> 255.255.255.255: reply from r1 (not forwarding)"
+	bc_forwarding_restore
+
+	sysctl_restore net.ipv4.icmp_echo_ignore_broadcasts
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755
index 000000000..79a209927
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -0,0 +1,342 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+	vrf_create "vrf-h1"
+	ip link set dev $h1 master vrf-h1
+
+	ip link set dev vrf-h1 up
+	ip link set dev $h1 up
+
+	ip address add 192.0.2.2/24 dev $h1
+	ip address add 2001:db8:1::2/64 dev $h1
+
+	ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+	ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-h1
+	ip route del 198.51.100.0/24 vrf vrf-h1
+
+	ip address del 2001:db8:1::2/64 dev $h1
+	ip address del 192.0.2.2/24 dev $h1
+
+	ip link set dev $h1 down
+	vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+	vrf_create "vrf-h2"
+	ip link set dev $h2 master vrf-h2
+
+	ip link set dev vrf-h2 up
+	ip link set dev $h2 up
+
+	ip address add 198.51.100.2/24 dev $h2
+	ip address add 2001:db8:2::2/64 dev $h2
+
+	ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+	ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-h2
+	ip route del 192.0.2.0/24 vrf vrf-h2
+
+	ip address del 2001:db8:2::2/64 dev $h2
+	ip address del 198.51.100.2/24 dev $h2
+
+	ip link set dev $h2 down
+	vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+	vrf_create "vrf-r1"
+	ip link set dev $rp11 master vrf-r1
+	ip link set dev $rp12 master vrf-r1
+	ip link set dev $rp13 master vrf-r1
+
+	ip link set dev vrf-r1 up
+	ip link set dev $rp11 up
+	ip link set dev $rp12 up
+	ip link set dev $rp13 up
+
+	ip address add 192.0.2.1/24 dev $rp11
+	ip address add 2001:db8:1::1/64 dev $rp11
+
+	ip address add 169.254.2.12/24 dev $rp12
+	ip address add fe80:2::12/64 dev $rp12
+
+	ip address add 169.254.3.13/24 dev $rp13
+	ip address add fe80:3::13/64 dev $rp13
+
+	ip route add 198.51.100.0/24 vrf vrf-r1 \
+		nexthop via 169.254.2.22 dev $rp12 \
+		nexthop via 169.254.3.23 dev $rp13
+	ip route add 2001:db8:2::/64 vrf vrf-r1 \
+		nexthop via fe80:2::22 dev $rp12 \
+		nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+	ip route del 2001:db8:2::/64 vrf vrf-r1
+	ip route del 198.51.100.0/24 vrf vrf-r1
+
+	ip address del fe80:3::13/64 dev $rp13
+	ip address del 169.254.3.13/24 dev $rp13
+
+	ip address del fe80:2::12/64 dev $rp12
+	ip address del 169.254.2.12/24 dev $rp12
+
+	ip address del 2001:db8:1::1/64 dev $rp11
+	ip address del 192.0.2.1/24 dev $rp11
+
+	ip link set dev $rp13 down
+	ip link set dev $rp12 down
+	ip link set dev $rp11 down
+
+	vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+	vrf_create "vrf-r2"
+	ip link set dev $rp21 master vrf-r2
+	ip link set dev $rp22 master vrf-r2
+	ip link set dev $rp23 master vrf-r2
+
+	ip link set dev vrf-r2 up
+	ip link set dev $rp21 up
+	ip link set dev $rp22 up
+	ip link set dev $rp23 up
+
+	ip address add 198.51.100.1/24 dev $rp21
+	ip address add 2001:db8:2::1/64 dev $rp21
+
+	ip address add 169.254.2.22/24 dev $rp22
+	ip address add fe80:2::22/64 dev $rp22
+
+	ip address add 169.254.3.23/24 dev $rp23
+	ip address add fe80:3::23/64 dev $rp23
+
+	ip route add 192.0.2.0/24 vrf vrf-r2 \
+		nexthop via 169.254.2.12 dev $rp22 \
+		nexthop via 169.254.3.13 dev $rp23
+	ip route add 2001:db8:1::/64 vrf vrf-r2 \
+		nexthop via fe80:2::12 dev $rp22 \
+		nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+	ip route del 2001:db8:1::/64 vrf vrf-r2
+	ip route del 192.0.2.0/24 vrf vrf-r2
+
+	ip address del fe80:3::23/64 dev $rp23
+	ip address del 169.254.3.23/24 dev $rp23
+
+	ip address del fe80:2::22/64 dev $rp22
+	ip address del 169.254.2.22/24 dev $rp22
+
+	ip address del 2001:db8:2::1/64 dev $rp21
+	ip address del 198.51.100.1/24 dev $rp21
+
+	ip link set dev $rp23 down
+	ip link set dev $rp22 down
+	ip link set dev $rp21 down
+
+	vrf_destroy "vrf-r2"
+}
+
+multipath4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       sysctl_set net.ipv4.fib_multipath_hash_policy 1
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+               nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       # Restore settings.
+       ip route replace 198.51.100.0/24 vrf vrf-r1 \
+               nexthop via 169.254.2.22 dev $rp12 \
+               nexthop via 169.254.3.23 dev $rp13
+       sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+
+       # Transmit multiple flows from h1 to h2 and make sure they are
+       # distributed between both multipath links (rp12 and rp13)
+       # according to the configured weights.
+       sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+	       nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+	       -d 1msec -t udp "sp=1024,dp=0-32768"
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 \
+	       nexthop via fe80:3::23 dev $rp13
+
+       sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+       local desc="$1"
+       local weight_rp12=$2
+       local weight_rp13=$3
+       local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+       local packets_rp12 packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+	       nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+       t0_rp12=$(link_stats_tx_packets_get $rp12)
+       t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+       # Generate 16384 echo requests, each with a random flow label.
+       for _ in $(seq 1 16384); do
+	       ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+       done
+
+       t1_rp12=$(link_stats_tx_packets_get $rp12)
+       t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+       let "packets_rp12 = $t1_rp12 - $t0_rp12"
+       let "packets_rp13 = $t1_rp13 - $t0_rp13"
+       multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+       ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+	       nexthop via fe80:2::22 dev $rp12 \
+	       nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+	log_info "Running IPv4 multipath tests"
+	multipath4_test "ECMP" 1 1
+	multipath4_test "Weighted MP 2:1" 2 1
+	multipath4_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 multipath tests"
+	multipath6_test "ECMP" 1 1
+	multipath6_test "Weighted MP 2:1" 2 1
+	multipath6_test "Weighted MP 11:45" 11 45
+
+	log_info "Running IPv6 L4 hash multipath tests"
+	multipath6_l4_test "ECMP" 1 1
+	multipath6_l4_test "Weighted MP 2:1" 2 1
+	multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp11=${NETIFS[p2]}
+
+	rp12=${NETIFS[p3]}
+	rp22=${NETIFS[p4]}
+
+	rp13=${NETIFS[p5]}
+	rp23=${NETIFS[p6]}
+
+	rp21=${NETIFS[p7]}
+	h2=${NETIFS[p8]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router1_create
+	router2_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router2_destroy
+	router1_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755
index 000000000..813d02d19
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -0,0 +1,213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+	mirred_egress_mirror_test gact_trap_test"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 clsact
+
+	simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+	simple_if_fini $swp2 192.0.2.1/24
+
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_test()
+{
+	local action=$1
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched without redirect rule inserted"
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action mirred egress $action \
+		dev $swp2
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match incoming $action packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "mirred egress $action ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+	RET=0
+
+	tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 102 1
+	check_err $? "Packet was not dropped"
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action ok
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_err $? "Did not see passed packet"
+
+	tc_check_packets "dev $swp1 ingress" 102 2
+	check_fail $? "Packet was dropped and it should not reach here"
+
+	tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+	RET=0
+
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_hw dst_ip 192.0.2.2 action drop
+	tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+		dev $swp2
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_fail $? "Saw packet without trap rule inserted"
+
+	tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action trap
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $swp1 ingress" 102 1
+	check_err $? "Packet was not trapped"
+
+	tc_check_packets "dev $swp1 ingress" 101 1
+	check_err $? "Did not see trapped packet"
+
+	tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	swp1origmac=$(mac_get $swp1)
+	swp2origmac=$(mac_get $swp2)
+	ip link set $swp1 address $h2mac
+	ip link set $swp2 address $h1mac
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	ip link set $swp2 address $swp2origmac
+	ip link set $swp1 address $swp1origmac
+}
+
+mirred_egress_redirect_test()
+{
+	mirred_egress_test "redirect"
+}
+
+mirred_egress_mirror_test()
+{
+	mirred_egress_test "mirror"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755
index 000000000..2934fb5ed
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="unreachable_chain_test gact_goto_chain_test create_destroy_chain \
+	   template_filter_fits"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower $tcflags dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 1101 1
+	check_fail $? "matched on filter in unreachable chain"
+
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower
+
+	log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower $tcflags dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_mac $h2mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_mac $h2mac action goto chain 1
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on correct filter with goto chain action"
+
+	tc_check_packets "dev $h2 ingress" 1101 1
+	check_err $? "Did not match on correct filter in chain 1"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower
+
+	log_test "gact goto chain ($tcflags)"
+}
+
+create_destroy_chain()
+{
+	RET=0
+
+	tc chain add dev $h2 ingress
+	check_err $? "Failed to create default chain"
+
+	output="$(tc -j chain get dev $h2 ingress)"
+	check_err $? "Failed to get default chain"
+
+	echo $output | jq -e ".[] | select(.chain == 0)" &> /dev/null
+	check_err $? "Unexpected output for default chain"
+
+	tc chain add dev $h2 ingress chain 1
+	check_err $? "Failed to create chain 1"
+
+	output="$(tc -j chain get dev $h2 ingress chain 1)"
+	check_err $? "Failed to get chain 1"
+
+	echo $output | jq -e ".[] | select(.chain == 1)" &> /dev/null
+	check_err $? "Unexpected output for chain 1"
+
+	output="$(tc -j chain show dev $h2 ingress)"
+	check_err $? "Failed to dump chains"
+
+	echo $output | jq -e ".[] | select(.chain == 0)" &> /dev/null
+	check_err $? "Can't find default chain in dump"
+
+	echo $output | jq -e ".[] | select(.chain == 1)" &> /dev/null
+	check_err $? "Can't find chain 1 in dump"
+
+	tc chain del dev $h2 ingress
+	check_err $? "Failed to destroy default chain"
+
+	tc chain del dev $h2 ingress chain 1
+	check_err $? "Failed to destroy chain 1"
+
+	log_test "create destroy chain"
+}
+
+template_filter_fits()
+{
+	RET=0
+
+	tc chain add dev $h2 ingress protocol ip \
+		flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+	tc chain add dev $h2 ingress chain 1 protocol ip \
+		flower src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 1101 \
+		flower dst_mac $h2mac action drop
+	check_err $? "Failed to insert filter which fits template"
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \
+		flower src_mac $h2mac action drop &> /dev/null
+	check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower src_mac $h2mac action drop
+	check_err $? "Failed to insert filter which fits template"
+
+	tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+		flower dst_mac $h2mac action drop &> /dev/null
+	check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+		flower &> /dev/null
+	tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+		flower &> /dev/null
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 1102 \
+		flower &> /dev/null
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 1101 \
+		flower &> /dev/null
+
+	tc chain del dev $h2 ingress chain 1
+	tc chain del dev $h2 ingress
+
+	log_test "template filter fits"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+check_tc_chain_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644
index 000000000..9d3b64a2a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+	local id=$1
+	local handle=$2
+	local count=$3
+	local ret
+
+	output="$(tc -j -s filter show $id)"
+	# workaround the jq bug which causes jq to return 0 in case input is ""
+	ret=$?
+	if [[ $ret -ne 0 ]]; then
+		return $ret
+	fi
+	echo $output | \
+		jq -e ".[] \
+		| select(.options.handle == $handle) \
+		| select(.options.actions[0].stats.packets == $count)" \
+		&> /dev/null
+	return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755
index 000000000..20d1077e5
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -0,0 +1,260 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
+	match_src_ip_test match_ip_flags_test"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+	tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 clsact
+	simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+	local dummy_mac=de:ad:be:ef:aa:aa
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_mac $dummy_mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_mac $h2mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+	local dummy_mac=de:ad:be:ef:aa:aa
+
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags src_mac $dummy_mac action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags src_mac $h1mac action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 198.51.100.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags dst_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on correct filter with mask"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags src_ip 198.51.100.1 action drop
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags src_ip 192.0.2.1 action drop
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags src_ip 192.0.2.0/24 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on a wrong filter"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match on correct filter"
+
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on correct filter with mask"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+	log_test "src_ip match ($tcflags)"
+}
+
+match_ip_flags_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags ip_flags frag action continue
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags ip_flags firstfrag action continue
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags ip_flags nofirstfrag action continue
+	tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \
+		$tcflags ip_flags nofrag action drop
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=0" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on wrong frag filter (nofrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_fail $? "Matched on wrong firstfrag filter (nofrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on nofirstfrag filter (nofrag) "
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Did not match on nofrag filter (nofrag)"
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=0,mf" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on frag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match fistfrag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Matched on wrong nofirstfrag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Match on wrong nofrag filter (1stfrag)"
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=256,mf" -q
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=256" -q
+
+	tc_check_packets "dev $h2 ingress" 101 3
+	check_err $? "Did not match on frag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Matched on wrong firstfrag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 3
+	check_err $? "Did not match on nofirstfrag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Matched on nofrag filter (no1stfrag)"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower
+
+	log_test "ip_flags match ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755
index 000000000..9826a446e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="shared_block_test"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.2/24
+	tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+	simple_if_init $swp2 192.0.2.2/24
+	tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp2 clsact
+	simple_if_fini $swp2 192.0.2.2/24
+
+	tc qdisc del dev $swp1 clsact
+	simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+	RET=0
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		$tcflags dst_ip 192.0.2.2 action drop
+
+	$MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 101 1
+	check_err $? "Did not match first incoming packet on a block"
+
+	$MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip -q
+
+	tc_check_packets "block 22" 101 2
+	check_err $? "Did not match second incoming packet on a block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h1mac=$(mac_get $h1)
+	h2mac=$(mac_get $h2)
+
+	swmac=$(mac_get $swp1)
+	swp2origmac=$(mac_get $swp2)
+	ip link set $swp2 address $swmac
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+
+	ip link set $swp2 address $swp2origmac
+}
+
+check_tc_shblock_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+	log_info "Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/in_netns.sh b/tools/testing/selftests/net/in_netns.sh
new file mode 100755
index 000000000..88795b510
--- /dev/null
+++ b/tools/testing/selftests/net/in_netns.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Execute a subprocess in a network namespace
+
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+	ip netns add "${NETNS}"
+	ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+	ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+ip netns exec "${NETNS}" "$@"
+exit "$?"
diff --git a/tools/testing/selftests/net/ip6_gre_headroom.sh b/tools/testing/selftests/net/ip6_gre_headroom.sh
new file mode 100755
index 000000000..5b41e8bb6
--- /dev/null
+++ b/tools/testing/selftests/net/ip6_gre_headroom.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that enough headroom is reserved for the first packet passing through an
+# IPv6 GRE-like netdevice.
+
+setup_prepare()
+{
+	ip link add h1 type veth peer name swp1
+	ip link add h3 type veth peer name swp3
+
+	ip link set dev h1 up
+	ip address add 192.0.2.1/28 dev h1
+
+	ip link add dev vh3 type vrf table 20
+	ip link set dev h3 master vh3
+	ip link set dev vh3 up
+	ip link set dev h3 up
+
+	ip link set dev swp3 up
+	ip address add dev swp3 2001:db8:2::1/64
+	ip address add dev swp3 2001:db8:2::3/64
+
+	ip link set dev swp1 up
+	tc qdisc add dev swp1 clsact
+
+	ip link add name er6 type ip6erspan \
+	   local 2001:db8:2::1 remote 2001:db8:2::2 oseq okey 123
+	ip link set dev er6 up
+
+	ip link add name gt6 type ip6gretap \
+	   local 2001:db8:2::3 remote 2001:db8:2::4
+	ip link set dev gt6 up
+
+	sleep 1
+}
+
+cleanup()
+{
+	ip link del dev gt6
+	ip link del dev er6
+	ip link del dev swp1
+	ip link del dev swp3
+	ip link del dev vh3
+}
+
+test_headroom()
+{
+	local type=$1; shift
+	local tundev=$1; shift
+
+	tc filter add dev swp1 ingress pref 1000 matchall skip_hw \
+		action mirred egress mirror dev $tundev
+	ping -I h1 192.0.2.2 -c 1 -w 2 &> /dev/null
+	tc filter del dev swp1 ingress pref 1000
+
+	# If it doesn't panic, it passes.
+	printf "TEST: %-60s  [PASS]\n" "$type headroom"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+test_headroom ip6gretap gt6
+test_headroom ip6erspan er6
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
new file mode 100644
index 000000000..c53959193
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -0,0 +1,810 @@
+/* Evaluate MSG_ZEROCOPY
+ *
+ * Send traffic between two processes over one of the supported
+ * protocols and modes:
+ *
+ * PF_INET/PF_INET6
+ * - SOCK_STREAM
+ * - SOCK_DGRAM
+ * - SOCK_DGRAM with UDP_CORK
+ * - SOCK_RAW
+ * - SOCK_RAW with IP_HDRINCL
+ *
+ * PF_PACKET
+ * - SOCK_DGRAM
+ * - SOCK_RAW
+ *
+ * PF_RDS
+ * - SOCK_SEQPACKET
+ *
+ * Start this program on two connected hosts, one in send mode and
+ * the other with option '-r' to put it in receiver mode.
+ *
+ * If zerocopy mode ('-z') is enabled, the sender will verify that
+ * the kernel queues completions on the error queue for all zerocopy
+ * transfers.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/rds.h>
+
+#ifndef SO_EE_ORIGIN_ZEROCOPY
+#define SO_EE_ORIGIN_ZEROCOPY		5
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY	60
+#endif
+
+#ifndef SO_EE_CODE_ZEROCOPY_COPIED
+#define SO_EE_CODE_ZEROCOPY_COPIED	1
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY	0x4000000
+#endif
+
+static int  cfg_cork;
+static bool cfg_cork_mixed;
+static int  cfg_cpu		= -1;		/* default: pin to last cpu */
+static int  cfg_family		= PF_UNSPEC;
+static int  cfg_ifindex		= 1;
+static int  cfg_payload_len;
+static int  cfg_port		= 8000;
+static bool cfg_rx;
+static int  cfg_runtime_ms	= 4200;
+static int  cfg_verbose;
+static int  cfg_waittime_ms	= 500;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+
+static char payload[IP_MAXPACKET];
+static long packets, bytes, completions, expected_completions;
+static int  zerocopied = -1;
+static uint32_t next_completion;
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static uint16_t get_ip_csum(const uint16_t *start, int num_words)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_words; i++)
+		sum += start[i];
+
+	while (sum >> 16)
+		sum = (sum & 0xFFFF) + (sum >> 16);
+
+	return ~sum;
+}
+
+static int do_setcpu(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask))
+		fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
+	else if (cfg_verbose)
+		fprintf(stderr, "cpu: %u\n", cpu);
+
+	return 0;
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+	if (setsockopt(fd, level, optname, &val, sizeof(val)))
+		error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_poll(int fd, int events)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.events = events;
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	ret = poll(&pfd, 1, cfg_waittime_ms);
+	if (ret == -1)
+		error(1, errno, "poll");
+
+	return ret && (pfd.revents & events);
+}
+
+static int do_accept(int fd)
+{
+	int fda = fd;
+
+	fd = accept(fda, NULL, NULL);
+	if (fd == -1)
+		error(1, errno, "accept");
+	if (close(fda))
+		error(1, errno, "close listen sock");
+
+	return fd;
+}
+
+static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
+{
+	struct cmsghdr *cm;
+
+	if (!msg->msg_control)
+		error(1, errno, "NULL cookie");
+	cm = (void *)msg->msg_control;
+	cm->cmsg_len = CMSG_LEN(sizeof(cookie));
+	cm->cmsg_level = SOL_RDS;
+	cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+	memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
+{
+	int ret, len, i, flags;
+	static uint32_t cookie;
+	char ckbuf[CMSG_SPACE(sizeof(cookie))];
+
+	len = 0;
+	for (i = 0; i < msg->msg_iovlen; i++)
+		len += msg->msg_iov[i].iov_len;
+
+	flags = MSG_DONTWAIT;
+	if (do_zerocopy) {
+		flags |= MSG_ZEROCOPY;
+		if (domain == PF_RDS) {
+			memset(&msg->msg_control, 0, sizeof(msg->msg_control));
+			msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
+			msg->msg_control = (struct cmsghdr *)ckbuf;
+			add_zcopy_cookie(msg, ++cookie);
+		}
+	}
+
+	ret = sendmsg(fd, msg, flags);
+	if (ret == -1 && errno == EAGAIN)
+		return false;
+	if (ret == -1)
+		error(1, errno, "send");
+	if (cfg_verbose && ret != len)
+		fprintf(stderr, "send: ret=%u != %u\n", ret, len);
+
+	if (len) {
+		packets++;
+		bytes += ret;
+		if (do_zerocopy && ret)
+			expected_completions++;
+	}
+	if (do_zerocopy && domain == PF_RDS) {
+		msg->msg_control = NULL;
+		msg->msg_controllen = 0;
+	}
+
+	return true;
+}
+
+static void do_sendmsg_corked(int fd, struct msghdr *msg)
+{
+	bool do_zerocopy = cfg_zerocopy;
+	int i, payload_len, extra_len;
+
+	/* split up the packet. for non-multiple, make first buffer longer */
+	payload_len = cfg_payload_len / cfg_cork;
+	extra_len = cfg_payload_len - (cfg_cork * payload_len);
+
+	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+	for (i = 0; i < cfg_cork; i++) {
+
+		/* in mixed-frags mode, alternate zerocopy and copy frags
+		 * start with non-zerocopy, to ensure attach later works
+		 */
+		if (cfg_cork_mixed)
+			do_zerocopy = (i & 1);
+
+		msg->msg_iov[0].iov_len = payload_len + extra_len;
+		extra_len = 0;
+
+		do_sendmsg(fd, msg, do_zerocopy,
+			   (cfg_dst_addr.ss_family == AF_INET ?
+			    PF_INET : PF_INET6));
+	}
+
+	do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+}
+
+static int setup_iph(struct iphdr *iph, uint16_t payload_len)
+{
+	struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
+	struct sockaddr_in *saddr = (void *) &cfg_src_addr;
+
+	memset(iph, 0, sizeof(*iph));
+
+	iph->version	= 4;
+	iph->tos	= 0;
+	iph->ihl	= 5;
+	iph->ttl	= 2;
+	iph->saddr	= saddr->sin_addr.s_addr;
+	iph->daddr	= daddr->sin_addr.s_addr;
+	iph->protocol	= IPPROTO_EGP;
+	iph->tot_len	= htons(sizeof(*iph) + payload_len);
+	iph->check	= get_ip_csum((void *) iph, iph->ihl << 1);
+
+	return sizeof(*iph);
+}
+
+static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
+{
+	struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
+	struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
+
+	memset(ip6h, 0, sizeof(*ip6h));
+
+	ip6h->version		= 6;
+	ip6h->payload_len	= htons(payload_len);
+	ip6h->nexthdr		= IPPROTO_EGP;
+	ip6h->hop_limit		= 2;
+	ip6h->saddr		= saddr->sin6_addr;
+	ip6h->daddr		= daddr->sin6_addr;
+
+	return sizeof(*ip6h);
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+			   struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+	int fd;
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+	if (cfg_zerocopy)
+		do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
+
+	if (domain != PF_PACKET && domain != PF_RDS)
+		if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+			error(1, errno, "connect");
+
+	if (domain == PF_RDS) {
+		if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
+			error(1, errno, "bind");
+	}
+
+	return fd;
+}
+
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
+{
+	int i;
+
+	if (ck->num > RDS_MAX_ZCOOKIES)
+		error(1, 0, "Returned %d cookies, max expected %d\n",
+		      ck->num, RDS_MAX_ZCOOKIES);
+	for (i = 0; i < ck->num; i++)
+		if (cfg_verbose >= 2)
+			fprintf(stderr, "%d\n", ck->cookies[i]);
+	return ck->num;
+}
+
+static bool do_recvmsg_completion(int fd)
+{
+	char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+	struct rds_zcopy_cookies *ck;
+	struct cmsghdr *cmsg;
+	struct msghdr msg;
+	bool ret = false;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = sizeof(cmsgbuf);
+
+	if (recvmsg(fd, &msg, MSG_DONTWAIT))
+		return ret;
+
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, errno, "recvmsg notification: truncated");
+
+	for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+		if (cmsg->cmsg_level == SOL_RDS &&
+		    cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+			ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+			completions += do_process_zerocopy_cookies(ck);
+			ret = true;
+			break;
+		}
+		error(0, 0, "ignoring cmsg at level %d type %d\n",
+			    cmsg->cmsg_level, cmsg->cmsg_type);
+	}
+	return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
+{
+	struct sock_extended_err *serr;
+	struct msghdr msg = {};
+	struct cmsghdr *cm;
+	uint32_t hi, lo, range;
+	int ret, zerocopy;
+	char control[100];
+
+	if (domain == PF_RDS)
+		return do_recvmsg_completion(fd);
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+
+	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+	if (ret == -1 && errno == EAGAIN)
+		return false;
+	if (ret == -1)
+		error(1, errno, "recvmsg notification");
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, errno, "recvmsg notification: truncated");
+
+	cm = CMSG_FIRSTHDR(&msg);
+	if (!cm)
+		error(1, 0, "cmsg: no cmsg");
+	if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
+	      (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
+	      (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
+		error(1, 0, "serr: wrong type: %d.%d",
+		      cm->cmsg_level, cm->cmsg_type);
+
+	serr = (void *) CMSG_DATA(cm);
+
+	if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+		error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
+	if (serr->ee_errno != 0)
+		error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
+
+	hi = serr->ee_data;
+	lo = serr->ee_info;
+	range = hi - lo + 1;
+
+	/* Detect notification gaps. These should not happen often, if at all.
+	 * Gaps can occur due to drops, reordering and retransmissions.
+	 */
+	if (lo != next_completion)
+		fprintf(stderr, "gap: %u..%u does not append to %u\n",
+			lo, hi, next_completion);
+	next_completion = hi + 1;
+
+	zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
+	if (zerocopied == -1)
+		zerocopied = zerocopy;
+	else if (zerocopied != zerocopy) {
+		fprintf(stderr, "serr: inconsistent\n");
+		zerocopied = zerocopy;
+	}
+
+	if (cfg_verbose >= 2)
+		fprintf(stderr, "completed: %u (h=%u l=%u)\n",
+			range, hi, lo);
+
+	completions += range;
+	return true;
+}
+
+/* Read all outstanding messages on the errqueue */
+static void do_recv_completions(int fd, int domain)
+{
+	while (do_recv_completion(fd, domain)) {}
+}
+
+/* Wait for all remaining completions on the errqueue */
+static void do_recv_remaining_completions(int fd, int domain)
+{
+	int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
+
+	while (completions < expected_completions &&
+	       gettimeofday_ms() < tstop) {
+		if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+			do_recv_completions(fd, domain);
+	}
+
+	if (completions < expected_completions)
+		fprintf(stderr, "missing notifications: %lu < %lu\n",
+			completions, expected_completions);
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+	struct iovec iov[3] = { {0} };
+	struct sockaddr_ll laddr;
+	struct msghdr msg = {0};
+	struct ethhdr eth;
+	union {
+		struct ipv6hdr ip6h;
+		struct iphdr iph;
+	} nh;
+	uint64_t tstop;
+	int fd;
+
+	fd = do_setup_tx(domain, type, protocol);
+
+	if (domain == PF_PACKET) {
+		uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
+
+		/* sock_raw passes ll header as data */
+		if (type == SOCK_RAW) {
+			memset(eth.h_dest, 0x06, ETH_ALEN);
+			memset(eth.h_source, 0x02, ETH_ALEN);
+			eth.h_proto = htons(proto);
+			iov[0].iov_base = &eth;
+			iov[0].iov_len = sizeof(eth);
+			msg.msg_iovlen++;
+		}
+
+		/* both sock_raw and sock_dgram expect name */
+		memset(&laddr, 0, sizeof(laddr));
+		laddr.sll_family	= AF_PACKET;
+		laddr.sll_ifindex	= cfg_ifindex;
+		laddr.sll_protocol	= htons(proto);
+		laddr.sll_halen		= ETH_ALEN;
+
+		memset(laddr.sll_addr, 0x06, ETH_ALEN);
+
+		msg.msg_name		= &laddr;
+		msg.msg_namelen		= sizeof(laddr);
+	}
+
+	/* packet and raw sockets with hdrincl must pass network header */
+	if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
+		if (cfg_family == PF_INET)
+			iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
+		else
+			iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
+
+		iov[1].iov_base = (void *) &nh;
+		msg.msg_iovlen++;
+	}
+
+	if (domain == PF_RDS) {
+		msg.msg_name = &cfg_dst_addr;
+		msg.msg_namelen =  (cfg_dst_addr.ss_family == AF_INET ?
+				    sizeof(struct sockaddr_in) :
+				    sizeof(struct sockaddr_in6));
+	}
+
+	iov[2].iov_base = payload;
+	iov[2].iov_len = cfg_payload_len;
+	msg.msg_iovlen++;
+	msg.msg_iov = &iov[3 - msg.msg_iovlen];
+
+	tstop = gettimeofday_ms() + cfg_runtime_ms;
+	do {
+		if (cfg_cork)
+			do_sendmsg_corked(fd, &msg);
+		else
+			do_sendmsg(fd, &msg, cfg_zerocopy, domain);
+
+		while (!do_poll(fd, POLLOUT)) {
+			if (cfg_zerocopy)
+				do_recv_completions(fd, domain);
+		}
+
+	} while (gettimeofday_ms() < tstop);
+
+	if (cfg_zerocopy)
+		do_recv_remaining_completions(fd, domain);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
+		packets, bytes >> 20, completions,
+		zerocopied == 1 ? 'y' : 'n');
+}
+
+static int do_setup_rx(int domain, int type, int protocol)
+{
+	int fd;
+
+	/* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
+	 * to recv the only copy of the packet, not a clone
+	 */
+	if (domain == PF_PACKET)
+		error(1, 0, "Use PF_INET/SOCK_RAW to read");
+
+	if (type == SOCK_RAW && protocol == IPPROTO_RAW)
+		error(1, 0, "IPPROTO_RAW: not supported on Rx");
+
+	fd = socket(domain, type, protocol);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
+	do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
+	do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
+
+	if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
+		error(1, errno, "bind");
+
+	if (type == SOCK_STREAM) {
+		if (listen(fd, 1))
+			error(1, errno, "listen");
+		fd = do_accept(fd);
+	}
+
+	return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+	int ret;
+
+	/* MSG_TRUNC flushes up to len bytes */
+	ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+	if (ret == -1 && errno == EAGAIN)
+		return;
+	if (ret == -1)
+		error(1, errno, "flush");
+	if (!ret)
+		return;
+
+	packets++;
+	bytes += ret;
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_datagram(int fd, int type)
+{
+	int ret, off = 0;
+	char buf[64];
+
+	/* MSG_TRUNC will return full datagram length */
+	ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
+	if (ret == -1 && errno == EAGAIN)
+		return;
+
+	/* raw ipv4 return with header, raw ipv6 without */
+	if (cfg_family == PF_INET && type == SOCK_RAW) {
+		off += sizeof(struct iphdr);
+		ret -= sizeof(struct iphdr);
+	}
+
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (ret != cfg_payload_len)
+		error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
+	if (ret > sizeof(buf) - off)
+		ret = sizeof(buf) - off;
+	if (memcmp(buf + off, payload, ret))
+		error(1, 0, "recv: data mismatch");
+
+	packets++;
+	bytes += cfg_payload_len;
+}
+
+static void do_rx(int domain, int type, int protocol)
+{
+	uint64_t tstop;
+	int fd;
+
+	fd = do_setup_rx(domain, type, protocol);
+
+	tstop = gettimeofday_ms() + cfg_runtime_ms;
+	do {
+		if (type == SOCK_STREAM)
+			do_flush_tcp(fd);
+		else
+			do_flush_datagram(fd, type);
+
+		do_poll(fd, POLLIN);
+
+	} while (gettimeofday_ms() < tstop);
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+	int i;
+
+	if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
+		error(1, 0, "can only cork udp sockets");
+
+	do_setcpu(cfg_cpu);
+
+	for (i = 0; i < IP_MAXPACKET; i++)
+		payload[i] = 'a' + (i % 26);
+
+	if (cfg_rx)
+		do_rx(domain, type, protocol);
+	else
+		do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s [options] <test>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	const int max_payload_len = sizeof(payload) -
+				    sizeof(struct ipv6hdr) -
+				    sizeof(struct tcphdr) -
+				    40 /* max tcp options */;
+	int c;
+	char *daddr = NULL, *saddr = NULL;
+	char *cfg_test;
+
+	cfg_payload_len = max_payload_len;
+
+	while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
+		switch (c) {
+		case '4':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'c':
+			cfg_cork = strtol(optarg, NULL, 0);
+			break;
+		case 'C':
+			cfg_cpu = strtol(optarg, NULL, 0);
+			break;
+		case 'D':
+			daddr = optarg;
+			break;
+		case 'i':
+			cfg_ifindex = if_nametoindex(optarg);
+			if (cfg_ifindex == 0)
+				error(1, errno, "invalid iface: %s", optarg);
+			break;
+		case 'm':
+			cfg_cork_mixed = true;
+			break;
+		case 'p':
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 'r':
+			cfg_rx = true;
+			break;
+		case 's':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'S':
+			saddr = optarg;
+			break;
+		case 't':
+			cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+			break;
+		case 'v':
+			cfg_verbose++;
+			break;
+		case 'z':
+			cfg_zerocopy = true;
+			break;
+		}
+	}
+
+	cfg_test = argv[argc - 1];
+	if (strcmp(cfg_test, "rds") == 0) {
+		if (!daddr)
+			error(1, 0, "-D <server addr> required for PF_RDS\n");
+		if (!cfg_rx && !saddr)
+			error(1, 0, "-S <client addr> required for PF_RDS\n");
+	}
+	setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
+	setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
+
+	if (cfg_payload_len > max_payload_len)
+		error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+	if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
+		error(1, 0, "-m: cork_mixed requires corking and zerocopy");
+
+	if (optind != argc - 1)
+		usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+	const char *cfg_test;
+
+	parse_opts(argc, argv);
+
+	cfg_test = argv[argc - 1];
+
+	if (!strcmp(cfg_test, "packet"))
+		do_test(PF_PACKET, SOCK_RAW, 0);
+	else if (!strcmp(cfg_test, "packet_dgram"))
+		do_test(PF_PACKET, SOCK_DGRAM, 0);
+	else if (!strcmp(cfg_test, "raw"))
+		do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
+	else if (!strcmp(cfg_test, "raw_hdrincl"))
+		do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
+	else if (!strcmp(cfg_test, "tcp"))
+		do_test(cfg_family, SOCK_STREAM, 0);
+	else if (!strcmp(cfg_test, "udp"))
+		do_test(cfg_family, SOCK_DGRAM, 0);
+	else if (!strcmp(cfg_test, "rds"))
+		do_test(PF_RDS, SOCK_SEQPACKET, 0);
+	else
+		error(1, 0, "unknown cfg_test %s", cfg_test);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
new file mode 100755
index 000000000..c43c6debd
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+	$0 4 tcp -t 1
+	$0 6 tcp -t 1
+	echo "OK. All tests passed"
+	exit 0
+fi
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+	echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+	exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+	readonly SADDR="${SADDR4}"
+	readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+	readonly SADDR="${SADDR6}"
+	readonly DADDR="${DADDR6}"
+else
+	echo "Invalid IP version ${IP}"
+	exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet:	use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+	RXMODE='raw'
+	;;
+*)
+	RXMODE="${TXMODE}"
+	;;
+esac
+
+# Start of state changes: install cleanup handler
+save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
+
+cleanup() {
+	ip netns del "${NS2}"
+	ip netns del "${NS1}"
+	sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
+}
+
+trap cleanup EXIT
+
+# Configure system settings
+sysctl -w -q "${path_sysctl_mem}=1000000"
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+  peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add       fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add       fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+	local readonly ARGS="$1"
+
+	echo "ipv${IP} ${TXMODE} ${ARGS}"
+	ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+	sleep 0.2
+	ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+	wait
+}
+
+do_test "${EXTRA_ARGS}"
+do_test "-z ${EXTRA_ARGS}"
+echo ok
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
new file mode 100755
index 000000000..e3afcb424
--- /dev/null
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -0,0 +1,205 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking network interface
+# For the moment it tests only ethernet interface (but wifi could be easily added)
+#
+# We assume that all network driver are loaded
+# if not they probably have failed earlier in the boot process and their logged error will be catched by another test
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# this function will try to up the interface
+# if already up, nothing done
+# arg1: network interface name
+kci_net_start()
+{
+	netdev=$1
+
+	ip link show "$netdev" |grep -q UP
+	if [ $? -eq 0 ];then
+		echo "SKIP: $netdev: interface already up"
+		return $ksft_skip
+	fi
+
+	ip link set "$netdev" up
+	if [ $? -ne 0 ];then
+		echo "FAIL: $netdev: Fail to up interface"
+		return 1
+	else
+		echo "PASS: $netdev: set interface up"
+		NETDEV_STARTED=1
+	fi
+	return 0
+}
+
+# this function will try to setup an IP and MAC address on a network interface
+# Doing nothing if the interface was already up
+# arg1: network interface name
+kci_net_setup()
+{
+	netdev=$1
+
+	# do nothing if the interface was already up
+	if [ $NETDEV_STARTED -eq 0 ];then
+		return 0
+	fi
+
+	MACADDR='02:03:04:05:06:07'
+	ip link set dev $netdev address "$MACADDR"
+	if [ $? -ne 0 ];then
+		echo "FAIL: $netdev: Cannot set MAC address"
+	else
+		ip link show $netdev |grep -q "$MACADDR"
+		if [ $? -eq 0 ];then
+			echo "PASS: $netdev: set MAC address"
+		else
+			echo "FAIL: $netdev: Cannot set MAC address"
+		fi
+	fi
+
+	#check that the interface did not already have an IP
+	ip address show "$netdev" |grep '^[[:space:]]*inet'
+	if [ $? -eq 0 ];then
+		echo "SKIP: $netdev: already have an IP"
+		return $ksft_skip
+	fi
+
+	# TODO what ipaddr to set ? DHCP ?
+	echo "SKIP: $netdev: set IP address"
+	return $ksft_skip
+}
+
+# test an ethtool command
+# arg1: return code for not supported (see ethtool code source)
+# arg2: summary of the command
+# arg3: command to execute
+kci_netdev_ethtool_test()
+{
+	if [ $# -le 2 ];then
+		echo "SKIP: $netdev: ethtool: invalid number of arguments"
+		return 1
+	fi
+	$3 >/dev/null
+	ret=$?
+	if [ $ret -ne 0 ];then
+		if [ $ret -eq "$1" ];then
+			echo "SKIP: $netdev: ethtool $2 not supported"
+			return $ksft_skip
+		else
+			echo "FAIL: $netdev: ethtool $2"
+			return 1
+		fi
+	else
+		echo "PASS: $netdev: ethtool $2"
+	fi
+	return 0
+}
+
+# test ethtool commands
+# arg1: network interface name
+kci_netdev_ethtool()
+{
+	netdev=$1
+
+	#check presence of ethtool
+	ethtool --version 2>/dev/null >/dev/null
+	if [ $? -ne 0 ];then
+		echo "SKIP: ethtool not present"
+		return $ksft_skip
+	fi
+
+	TMP_ETHTOOL_FEATURES="$(mktemp)"
+	if [ ! -e "$TMP_ETHTOOL_FEATURES" ];then
+		echo "SKIP: Cannot create a tmp file"
+		return 1
+	fi
+
+	ethtool -k "$netdev" > "$TMP_ETHTOOL_FEATURES"
+	if [ $? -ne 0 ];then
+		echo "FAIL: $netdev: ethtool list features"
+		rm "$TMP_ETHTOOL_FEATURES"
+		return 1
+	fi
+	echo "PASS: $netdev: ethtool list features"
+	#TODO for each non fixed features, try to turn them on/off
+	rm "$TMP_ETHTOOL_FEATURES"
+
+	kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev"
+	kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev"
+	return 0
+}
+
+# stop a netdev
+# arg1: network interface name
+kci_netdev_stop()
+{
+	netdev=$1
+
+	if [ $NETDEV_STARTED -eq 0 ];then
+		echo "SKIP: $netdev: interface kept up"
+		return 0
+	fi
+
+	ip link set "$netdev" down
+	if [ $? -ne 0 ];then
+		echo "FAIL: $netdev: stop interface"
+		return 1
+	fi
+	echo "PASS: $netdev: stop interface"
+	return 0
+}
+
+# run all test on a netdev
+# arg1: network interface name
+kci_test_netdev()
+{
+	NETDEV_STARTED=0
+	IFACE_TO_UPDOWN="$1"
+	IFACE_TO_TEST="$1"
+	#check for VLAN interface
+	MASTER_IFACE="$(echo $1 | cut -d@ -f2)"
+	if [ ! -z "$MASTER_IFACE" ];then
+		IFACE_TO_UPDOWN="$MASTER_IFACE"
+		IFACE_TO_TEST="$(echo $1 | cut -d@ -f1)"
+	fi
+
+	NETDEV_STARTED=0
+	kci_net_start "$IFACE_TO_UPDOWN"
+
+	kci_net_setup "$IFACE_TO_TEST"
+
+	kci_netdev_ethtool "$IFACE_TO_TEST"
+
+	kci_netdev_stop "$IFACE_TO_UPDOWN"
+	return 0
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip
+fi
+
+ip link show 2>/dev/null >/dev/null
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without the ip tool"
+	exit $ksft_skip
+fi
+
+TMP_LIST_NETDEV="$(mktemp)"
+if [ ! -e "$TMP_LIST_NETDEV" ];then
+	echo "FAIL: Cannot create a tmp file"
+	exit 1
+fi
+
+ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\  -f2 | cut -d: -f1> "$TMP_LIST_NETDEV"
+while read netdev
+do
+	kci_test_netdev "$netdev"
+done < "$TMP_LIST_NETDEV"
+
+rm "$TMP_LIST_NETDEV"
+exit 0
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
new file mode 100755
index 000000000..0ab9423d0
--- /dev/null
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -0,0 +1,477 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that route PMTU values match expectations, and that initial device MTU
+# values are assigned correctly
+#
+# Tests currently implemented:
+#
+# - pmtu_vti4_exception
+#	Set up vti tunnel on top of veth, with xfrm states and policies, in two
+#	namespaces with matching endpoints. Check that route exception is not
+#	created if link layer MTU is not exceeded, then exceed it and check that
+#	exception is created with the expected PMTU. The approach described
+#	below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
+#	changes alone won't affect PMTU
+#
+# - pmtu_vti6_exception
+#	Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
+#	namespaces with matching endpoints. Check that route exception is
+#	created by exceeding link layer MTU with ping to other endpoint. Then
+#	decrease and increase MTU of tunnel, checking that route exception PMTU
+#	changes accordingly
+#
+# - pmtu_vti4_default_mtu
+#	Set up vti4 tunnel on top of veth, in two namespaces with matching
+#	endpoints. Check that MTU assigned to vti interface is the MTU of the
+#	lower layer (veth) minus additional lower layer headers (zero, for veth)
+#	minus IPv4 header length
+#
+# - pmtu_vti6_default_mtu
+#	Same as above, for IPv6
+#
+# - pmtu_vti4_link_add_mtu
+#	Set up vti4 interface passing MTU value at link creation, check MTU is
+#	configured, and that link is not created with invalid MTU values
+#
+# - pmtu_vti6_link_add_mtu
+#	Same as above, for IPv6
+#
+# - pmtu_vti6_link_change_mtu
+#	Set up two dummy interfaces with different MTUs, create a vti6 tunnel
+#	and check that configured MTU is used on link creation and changes, and
+#	that MTU is properly calculated instead when MTU is not configured from
+#	userspace
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Some systems don't have a ping6 binary anymore
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+tests="
+	pmtu_vti6_exception		vti6: PMTU exceptions
+	pmtu_vti4_exception		vti4: PMTU exceptions
+	pmtu_vti4_default_mtu		vti4: default MTU assignment
+	pmtu_vti6_default_mtu		vti6: default MTU assignment
+	pmtu_vti4_link_add_mtu		vti4: MTU setting on link creation
+	pmtu_vti6_link_add_mtu		vti6: MTU setting on link creation
+	pmtu_vti6_link_change_mtu	vti6: MTU changes on link changes"
+
+NS_A="ns-$(mktemp -u XXXXXX)"
+NS_B="ns-$(mktemp -u XXXXXX)"
+ns_a="ip netns exec ${NS_A}"
+ns_b="ip netns exec ${NS_B}"
+
+veth4_a_addr="192.168.1.1"
+veth4_b_addr="192.168.1.2"
+veth4_mask="24"
+veth6_a_addr="fd00:1::a"
+veth6_b_addr="fd00:1::b"
+veth6_mask="64"
+
+vti4_a_addr="192.168.2.1"
+vti4_b_addr="192.168.2.2"
+vti4_mask="24"
+vti6_a_addr="fd00:2::a"
+vti6_b_addr="fd00:2::b"
+vti6_mask="64"
+
+dummy6_0_addr="fc00:1000::0"
+dummy6_1_addr="fc00:1001::0"
+dummy6_mask="64"
+
+cleanup_done=1
+err_buf=
+
+err() {
+	err_buf="${err_buf}${1}
+"
+}
+
+err_flush() {
+	echo -n "${err_buf}"
+	err_buf=
+}
+
+setup_namespaces() {
+	ip netns add ${NS_A} || return 1
+	ip netns add ${NS_B}
+}
+
+setup_veth() {
+	${ns_a} ip link add veth_a type veth peer name veth_b || return 1
+	${ns_a} ip link set veth_b netns ${NS_B}
+
+	${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
+	${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
+
+	${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
+	${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
+
+	${ns_a} ip link set veth_a up
+	${ns_b} ip link set veth_b up
+}
+
+setup_vti() {
+	proto=${1}
+	veth_a_addr="${2}"
+	veth_b_addr="${3}"
+	vti_a_addr="${4}"
+	vti_b_addr="${5}"
+	vti_mask=${6}
+
+	[ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
+
+	${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
+	${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
+
+	${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
+	${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
+
+	${ns_a} ip link set vti${proto}_a up
+	${ns_b} ip link set vti${proto}_b up
+
+	sleep 1
+}
+
+setup_vti4() {
+	setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
+}
+
+setup_vti6() {
+	setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
+}
+
+setup_xfrm() {
+	proto=${1}
+	veth_a_addr="${2}"
+	veth_b_addr="${3}"
+
+	${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
+	${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+	${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+
+	${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+	${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+	${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+}
+
+setup_xfrm4() {
+	setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
+}
+
+setup_xfrm6() {
+	setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
+}
+
+setup() {
+	[ "$(id -u)" -ne 0 ] && echo "  need to run as root" && return $ksft_skip
+
+	cleanup_done=0
+	for arg do
+		eval setup_${arg} || { echo "  ${arg} not supported"; return 1; }
+	done
+}
+
+cleanup() {
+	[ ${cleanup_done} -eq 1 ] && return
+	ip netns del ${NS_A} 2> /dev/null
+	ip netns del ${NS_B} 2> /dev/null
+	cleanup_done=1
+}
+
+mtu() {
+	ns_cmd="${1}"
+	dev="${2}"
+	mtu="${3}"
+
+	${ns_cmd} ip link set dev ${dev} mtu ${mtu}
+}
+
+mtu_parse() {
+	input="${1}"
+
+	next=0
+	for i in ${input}; do
+		[ ${next} -eq 1 ] && echo "${i}" && return
+		[ "${i}" = "mtu" ] && next=1
+	done
+}
+
+link_get() {
+	ns_cmd="${1}"
+	name="${2}"
+
+	${ns_cmd} ip link show dev "${name}"
+}
+
+link_get_mtu() {
+	ns_cmd="${1}"
+	name="${2}"
+
+	mtu_parse "$(link_get "${ns_cmd}" ${name})"
+}
+
+route_get_dst_exception() {
+	ns_cmd="${1}"
+	dst="${2}"
+
+	${ns_cmd} ip route get "${dst}"
+}
+
+route_get_dst_pmtu_from_exception() {
+	ns_cmd="${1}"
+	dst="${2}"
+
+	mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
+}
+
+test_pmtu_vti4_exception() {
+	setup namespaces veth vti4 xfrm4 || return 2
+
+	veth_mtu=1500
+	vti_mtu=$((veth_mtu - 20))
+
+	#                                SPI   SN   IV  ICV   pad length   next header
+	esp_payload_rfc4106=$((vti_mtu - 4   - 4  - 8 - 16  - 1          - 1))
+	ping_payload=$((esp_payload_rfc4106 - 28))
+
+	mtu "${ns_a}" veth_a ${veth_mtu}
+	mtu "${ns_b}" veth_b ${veth_mtu}
+	mtu "${ns_a}" vti4_a ${vti_mtu}
+	mtu "${ns_b}" vti4_b ${vti_mtu}
+
+	# Send DF packet without exceeding link layer MTU, check that no
+	# exception is created
+	${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+	if [ "${pmtu}" != "" ]; then
+		err "  unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
+		return 1
+	fi
+
+	# Now exceed link layer MTU by one byte, check that exception is created
+	${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+	if [ "${pmtu}" = "" ]; then
+		err "  exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
+		return 1
+	fi
+
+	# ...with the right PMTU value
+	if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
+		err "  wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
+		return 1
+	fi
+}
+
+test_pmtu_vti6_exception() {
+	setup namespaces veth vti6 xfrm6 || return 2
+	fail=0
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}" veth_a 4000
+	mtu "${ns_b}" veth_b 4000
+	mtu "${ns_a}" vti6_a 5000
+	mtu "${ns_b}" vti6_b 5000
+	${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
+
+	# Check that exception was created
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
+		err "  tunnel exceeding link layer MTU didn't create route exception"
+		return 1
+	fi
+
+	# Decrease tunnel MTU, check for PMTU decrease in route exception
+	mtu "${ns_a}" vti6_a 3000
+
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
+		err "  decreasing tunnel MTU didn't decrease route exception PMTU"
+		fail=1
+	fi
+
+	# Increase tunnel MTU, check for PMTU increase in route exception
+	mtu "${ns_a}" vti6_a 9000
+	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
+		err "  increasing tunnel MTU didn't increase route exception PMTU"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
+test_pmtu_vti4_default_mtu() {
+	setup namespaces veth vti4 || return 2
+
+	# Check that MTU of vti device is MTU of veth minus IPv4 header length
+	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+	vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+	if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
+		err "  vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
+		return 1
+	fi
+}
+
+test_pmtu_vti6_default_mtu() {
+	setup namespaces veth vti6 || return 2
+
+	# Check that MTU of vti device is MTU of veth minus IPv6 header length
+	veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+	vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
+		err "  vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
+		return 1
+	fi
+}
+
+test_pmtu_vti4_link_add_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+	[ $? -ne 0 ] && err "  vti not supported" && return 2
+	${ns_a} ip link del vti4_a
+
+	fail=0
+
+	min=68
+	max=$((65535 - 20))
+	# Check invalid values first
+	for v in $((min - 1)) $((max + 1)); do
+		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
+		# This can fail, or MTU can be adjusted to a proper value
+		[ $? -ne 0 ] && continue
+		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+			err "  vti tunnel created with invalid MTU ${mtu}"
+			fail=1
+		fi
+		${ns_a} ip link del vti4_a
+	done
+
+	# Now check valid values
+	for v in ${min} 1300 ${max}; do
+		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+		mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+		${ns_a} ip link del vti4_a
+		if [ "${mtu}" != "${v}" ]; then
+			err "  vti MTU ${mtu} doesn't match configured value ${v}"
+			fail=1
+		fi
+	done
+
+	return ${fail}
+}
+
+test_pmtu_vti6_link_add_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+	[ $? -ne 0 ] && err "  vti6 not supported" && return 2
+	${ns_a} ip link del vti6_a
+
+	fail=0
+
+	min=68			# vti6 can carry IPv4 packets too
+	max=$((65535 - 40))
+	# Check invalid values first
+	for v in $((min - 1)) $((max + 1)); do
+		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
+		# This can fail, or MTU can be adjusted to a proper value
+		[ $? -ne 0 ] && continue
+		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+		if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+			err "  vti6 tunnel created with invalid MTU ${v}"
+			fail=1
+		fi
+		${ns_a} ip link del vti6_a
+	done
+
+	# Now check valid values
+	for v in 68 1280 1300 $((65535 - 40)); do
+		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+		${ns_a} ip link del vti6_a
+		if [ "${mtu}" != "${v}" ]; then
+			err "  vti6 MTU ${mtu} doesn't match configured value ${v}"
+			fail=1
+		fi
+	done
+
+	return ${fail}
+}
+
+test_pmtu_vti6_link_change_mtu() {
+	setup namespaces || return 2
+
+	${ns_a} ip link add dummy0 mtu 1500 type dummy
+	[ $? -ne 0 ] && err "  dummy not supported" && return 2
+	${ns_a} ip link add dummy1 mtu 3000 type dummy
+	${ns_a} ip link set dummy0 up
+	${ns_a} ip link set dummy1 up
+
+	${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
+	${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+
+	fail=0
+
+	# Create vti6 interface bound to device, passing MTU, check it
+	${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne 1300 ]; then
+		err "  vti6 MTU ${mtu} doesn't match configured value 1300"
+		fail=1
+	fi
+
+	# Move to another device with different MTU, without passing MTU, check
+	# MTU is adjusted
+	${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne $((3000 - 40)) ]; then
+		err "  vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
+		fail=1
+	fi
+
+	# Move it back, passing MTU, check MTU is not overridden
+	${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+	mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+	if [ ${mtu} -ne 1280 ]; then
+		err "  vti6 MTU ${mtu} doesn't match configured value 1280"
+		fail=1
+	fi
+
+	return ${fail}
+}
+
+trap cleanup EXIT
+
+exitcode=0
+desc=0
+IFS="	
+"
+for t in ${tests}; do
+	[ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+
+	(
+		unset IFS
+		eval test_${name}
+		ret=$?
+		cleanup
+
+		if [ $ret -eq 0 ]; then
+			printf "TEST: %-60s  [ OK ]\n" "${t}"
+		elif [ $ret -eq 1 ]; then
+			printf "TEST: %-60s  [FAIL]\n" "${t}"
+			err_flush
+			exit 1
+		elif [ $ret -eq 2 ]; then
+			printf "TEST: %-60s  [SKIP]\n" "${t}"
+			err_flush
+		fi
+	)
+	[ $? -ne 0 ] && exitcode=1
+done
+
+exit ${exitcode}
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
new file mode 100644
index 000000000..f496ba3b1
--- /dev/null
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright 2013 Google Inc.
+ * Author: Willem de Bruijn (willemb@google.com)
+ *
+ * A basic test of packet socket fanout behavior.
+ *
+ * Control:
+ * - create fanout fails as expected with illegal flag combinations
+ * - join   fanout fails as expected with diverging types or flags
+ *
+ * Datapath:
+ *   Open a pair of packet sockets and a pair of INET sockets, send a known
+ *   number of packets across the two INET sockets and count the number of
+ *   packets enqueued onto the two packet sockets.
+ *
+ *   The test currently runs for
+ *   - PACKET_FANOUT_HASH
+ *   - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
+ *   - PACKET_FANOUT_LB
+ *   - PACKET_FANOUT_CPU
+ *   - PACKET_FANOUT_ROLLOVER
+ *   - PACKET_FANOUT_CBPF
+ *   - PACKET_FANOUT_EBPF
+ *
+ * Todo:
+ * - functionality: PACKET_FANOUT_FLAG_DEFRAG
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE		/* for sched_setaffinity */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h>	/* for __NR_bpf */
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+#define RING_NUM_FRAMES			20
+
+/* Open a socket in a given fanout mode.
+ * @return -1 if mode is bad, a valid socket otherwise */
+static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
+{
+	struct sockaddr_ll addr = {0};
+	int fd, val;
+
+	fd = socket(PF_PACKET, SOCK_RAW, 0);
+	if (fd < 0) {
+		perror("socket packet");
+		exit(1);
+	}
+
+	pair_udp_setfilter(fd);
+
+	addr.sll_family = AF_PACKET;
+	addr.sll_protocol = htons(ETH_P_IP);
+	addr.sll_ifindex = if_nametoindex("lo");
+	if (addr.sll_ifindex == 0) {
+		perror("if_nametoindex");
+		exit(1);
+	}
+	if (bind(fd, (void *) &addr, sizeof(addr))) {
+		perror("bind packet");
+		exit(1);
+	}
+
+	val = (((int) typeflags) << 16) | group_id;
+	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
+		if (close(fd)) {
+			perror("close packet");
+			exit(1);
+		}
+		return -1;
+	}
+
+	return fd;
+}
+
+static void sock_fanout_set_cbpf(int fd)
+{
+	struct sock_filter bpf_filter[] = {
+		BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80),	      /* ldb [80] */
+		BPF_STMT(BPF_RET+BPF_A, 0),		      /* ret A */
+	};
+	struct sock_fprog bpf_prog;
+
+	bpf_prog.filter = bpf_filter;
+	bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog,
+		       sizeof(bpf_prog))) {
+		perror("fanout data cbpf");
+		exit(1);
+	}
+}
+
+static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id)
+{
+	int sockopt;
+	socklen_t sockopt_len = sizeof(sockopt);
+
+	if (getsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+		       &sockopt, &sockopt_len)) {
+		perror("failed to getsockopt");
+		exit(1);
+	}
+	*typeflags = sockopt >> 16;
+	*group_id = sockopt & 0xfffff;
+}
+
+static void sock_fanout_set_ebpf(int fd)
+{
+	static char log_buf[65536];
+
+	const int len_off = __builtin_offsetof(struct __sk_buff, len);
+	struct bpf_insn prog[] = {
+		{ BPF_ALU64 | BPF_MOV | BPF_X,   6, 1, 0, 0 },
+		{ BPF_LDX   | BPF_W   | BPF_MEM, 0, 6, len_off, 0 },
+		{ BPF_JMP   | BPF_JGE | BPF_K,   0, 0, 1, DATA_LEN },
+		{ BPF_JMP   | BPF_JA  | BPF_K,   0, 0, 4, 0 },
+		{ BPF_LD    | BPF_B   | BPF_ABS, 0, 0, 0, 0x50 },
+		{ BPF_JMP   | BPF_JEQ | BPF_K,   0, 0, 2, DATA_CHAR },
+		{ BPF_JMP   | BPF_JEQ | BPF_K,   0, 0, 1, DATA_CHAR_1 },
+		{ BPF_ALU   | BPF_MOV | BPF_K,   0, 0, 0, 0 },
+		{ BPF_JMP   | BPF_EXIT,          0, 0, 0, 0 }
+	};
+	union bpf_attr attr;
+	int pfd;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insns = (unsigned long) prog;
+	attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+	attr.license = (unsigned long) "GPL";
+	attr.log_buf = (unsigned long) log_buf,
+	attr.log_size = sizeof(log_buf),
+	attr.log_level = 1,
+
+	pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+	if (pfd < 0) {
+		perror("bpf");
+		fprintf(stderr, "bpf verifier:\n%s\n", log_buf);
+		exit(1);
+	}
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
+		perror("fanout data ebpf");
+		exit(1);
+	}
+
+	if (close(pfd)) {
+		perror("close ebpf");
+		exit(1);
+	}
+}
+
+static char *sock_fanout_open_ring(int fd)
+{
+	struct tpacket_req req = {
+		.tp_block_size = getpagesize(),
+		.tp_frame_size = getpagesize(),
+		.tp_block_nr   = RING_NUM_FRAMES,
+		.tp_frame_nr   = RING_NUM_FRAMES,
+	};
+	char *ring;
+	int val = TPACKET_V2;
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val,
+		       sizeof(val))) {
+		perror("packetsock ring setsockopt version");
+		exit(1);
+	}
+	if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req,
+		       sizeof(req))) {
+		perror("packetsock ring setsockopt");
+		exit(1);
+	}
+
+	ring = mmap(0, req.tp_block_size * req.tp_block_nr,
+		    PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+	if (ring == MAP_FAILED) {
+		perror("packetsock ring mmap");
+		exit(1);
+	}
+
+	return ring;
+}
+
+static int sock_fanout_read_ring(int fd, void *ring)
+{
+	struct tpacket2_hdr *header = ring;
+	int count = 0;
+
+	while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) {
+		count++;
+		header = ring + (count * getpagesize());
+	}
+
+	return count;
+}
+
+static int sock_fanout_read(int fds[], char *rings[], const int expect[])
+{
+	int ret[2];
+
+	ret[0] = sock_fanout_read_ring(fds[0], rings[0]);
+	ret[1] = sock_fanout_read_ring(fds[1], rings[1]);
+
+	fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n",
+			ret[0], ret[1], expect[0], expect[1]);
+
+	if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
+	    (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
+		fprintf(stderr, "warning: incorrect queue lengths\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+/* Test illegal mode + flag combination */
+static void test_control_single(void)
+{
+	fprintf(stderr, "test: control single socket\n");
+
+	if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
+			       PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
+		fprintf(stderr, "ERROR: opened socket with dual rollover\n");
+		exit(1);
+	}
+}
+
+/* Test illegal group with different modes or flags */
+static void test_control_group(void)
+{
+	int fds[2];
+
+	fprintf(stderr, "test: control multiple sockets\n");
+
+	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[0] == -1) {
+		fprintf(stderr, "ERROR: failed to open HASH socket\n");
+		exit(1);
+	}
+	if (sock_fanout_open(PACKET_FANOUT_HASH |
+			       PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) {
+		fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
+		exit(1);
+	}
+	if (sock_fanout_open(PACKET_FANOUT_HASH |
+			       PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
+		fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
+		exit(1);
+	}
+	if (sock_fanout_open(PACKET_FANOUT_CPU, 0) != -1) {
+		fprintf(stderr, "ERROR: joined group with wrong mode\n");
+		exit(1);
+	}
+	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+	if (fds[1] == -1) {
+		fprintf(stderr, "ERROR: failed to join group\n");
+		exit(1);
+	}
+	if (close(fds[1]) || close(fds[0])) {
+		fprintf(stderr, "ERROR: closing sockets\n");
+		exit(1);
+	}
+}
+
+/* Test creating a unique fanout group ids */
+static void test_unique_fanout_group_ids(void)
+{
+	int fds[3];
+	uint16_t typeflags, first_group_id, second_group_id;
+
+	fprintf(stderr, "test: unique ids\n");
+
+	fds[0] = sock_fanout_open(PACKET_FANOUT_HASH |
+				  PACKET_FANOUT_FLAG_UNIQUEID, 0);
+	if (fds[0] == -1) {
+		fprintf(stderr, "ERROR: failed to create a unique id group.\n");
+		exit(1);
+	}
+
+	sock_fanout_getopts(fds[0], &typeflags, &first_group_id);
+	if (typeflags != PACKET_FANOUT_HASH) {
+		fprintf(stderr, "ERROR: unexpected typeflags %x\n", typeflags);
+		exit(1);
+	}
+
+	if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id) != -1) {
+		fprintf(stderr, "ERROR: joined group with wrong type.\n");
+		exit(1);
+	}
+
+	fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, first_group_id);
+	if (fds[1] == -1) {
+		fprintf(stderr,
+			"ERROR: failed to join previously created group.\n");
+		exit(1);
+	}
+
+	fds[2] = sock_fanout_open(PACKET_FANOUT_HASH |
+				  PACKET_FANOUT_FLAG_UNIQUEID, 0);
+	if (fds[2] == -1) {
+		fprintf(stderr,
+			"ERROR: failed to create a second unique id group.\n");
+		exit(1);
+	}
+
+	sock_fanout_getopts(fds[2], &typeflags, &second_group_id);
+	if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_UNIQUEID,
+			     second_group_id) != -1) {
+		fprintf(stderr,
+			"ERROR: specified a group id when requesting unique id\n");
+		exit(1);
+	}
+
+	if (close(fds[0]) || close(fds[1]) || close(fds[2])) {
+		fprintf(stderr, "ERROR: closing sockets\n");
+		exit(1);
+	}
+}
+
+static int test_datapath(uint16_t typeflags, int port_off,
+			 const int expect1[], const int expect2[])
+{
+	const int expect0[] = { 0, 0 };
+	char *rings[2];
+	uint8_t type = typeflags & 0xFF;
+	int fds[2], fds_udp[2][2], ret;
+
+	fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
+		typeflags, (uint16_t)PORT_BASE,
+		(uint16_t)(PORT_BASE + port_off));
+
+	fds[0] = sock_fanout_open(typeflags, 0);
+	fds[1] = sock_fanout_open(typeflags, 0);
+	if (fds[0] == -1 || fds[1] == -1) {
+		fprintf(stderr, "ERROR: failed open\n");
+		exit(1);
+	}
+	if (type == PACKET_FANOUT_CBPF)
+		sock_fanout_set_cbpf(fds[0]);
+	else if (type == PACKET_FANOUT_EBPF)
+		sock_fanout_set_ebpf(fds[0]);
+
+	rings[0] = sock_fanout_open_ring(fds[0]);
+	rings[1] = sock_fanout_open_ring(fds[1]);
+	pair_udp_open(fds_udp[0], PORT_BASE);
+	pair_udp_open(fds_udp[1], PORT_BASE + port_off);
+	sock_fanout_read(fds, rings, expect0);
+
+	/* Send data, but not enough to overflow a queue */
+	pair_udp_send(fds_udp[0], 15);
+	pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1);
+	ret = sock_fanout_read(fds, rings, expect1);
+
+	/* Send more data, overflow the queue */
+	pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1);
+	/* TODO: ensure consistent order between expect1 and expect2 */
+	ret |= sock_fanout_read(fds, rings, expect2);
+
+	if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) ||
+	    munmap(rings[0], RING_NUM_FRAMES * getpagesize())) {
+		fprintf(stderr, "close rings\n");
+		exit(1);
+	}
+	if (close(fds_udp[1][1]) || close(fds_udp[1][0]) ||
+	    close(fds_udp[0][1]) || close(fds_udp[0][0]) ||
+	    close(fds[1]) || close(fds[0])) {
+		fprintf(stderr, "close datapath\n");
+		exit(1);
+	}
+
+	return ret;
+}
+
+static int set_cpuaffinity(int cpuid)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpuid, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask)) {
+		if (errno != EINVAL) {
+			fprintf(stderr, "setaffinity %d\n", cpuid);
+			exit(1);
+		}
+		return 1;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	const int expect_hash[2][2]	= { { 15, 5 },  { 20, 5 } };
+	const int expect_hash_rb[2][2]	= { { 15, 5 },  { 20, 15 } };
+	const int expect_lb[2][2]	= { { 10, 10 }, { 18, 17 } };
+	const int expect_rb[2][2]	= { { 15, 5 },  { 20, 15 } };
+	const int expect_cpu0[2][2]	= { { 20, 0 },  { 20, 0 } };
+	const int expect_cpu1[2][2]	= { { 0, 20 },  { 0, 20 } };
+	const int expect_bpf[2][2]	= { { 15, 5 },  { 15, 20 } };
+	const int expect_uniqueid[2][2] = { { 20, 20},  { 20, 20 } };
+	int port_off = 2, tries = 20, ret;
+
+	test_control_single();
+	test_control_group();
+	test_unique_fanout_group_ids();
+
+	/* find a set of ports that do not collide onto the same socket */
+	ret = test_datapath(PACKET_FANOUT_HASH, port_off,
+			    expect_hash[0], expect_hash[1]);
+	while (ret) {
+		fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
+		ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
+				    expect_hash[0], expect_hash[1]);
+		if (!--tries) {
+			fprintf(stderr, "too many collisions\n");
+			return 1;
+		}
+	}
+
+	ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
+			     port_off, expect_hash_rb[0], expect_hash_rb[1]);
+	ret |= test_datapath(PACKET_FANOUT_LB,
+			     port_off, expect_lb[0], expect_lb[1]);
+	ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
+			     port_off, expect_rb[0], expect_rb[1]);
+
+	ret |= test_datapath(PACKET_FANOUT_CBPF,
+			     port_off, expect_bpf[0], expect_bpf[1]);
+	ret |= test_datapath(PACKET_FANOUT_EBPF,
+			     port_off, expect_bpf[0], expect_bpf[1]);
+
+	set_cpuaffinity(0);
+	ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
+			     expect_cpu0[0], expect_cpu0[1]);
+	if (!set_cpuaffinity(1))
+		/* TODO: test that choice alternates with previous */
+		ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
+				     expect_cpu1[0], expect_cpu1[1]);
+
+	ret |= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID, port_off,
+			     expect_uniqueid[0], expect_uniqueid[1]);
+
+	if (ret)
+		return 1;
+
+	printf("OK. All tests passed\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
new file mode 100644
index 000000000..7d990d6c8
--- /dev/null
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright 2013 Google Inc.
+ * Author: Willem de Bruijn <willemb@google.com>
+ *         Daniel Borkmann <dborkman@redhat.com>
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef PSOCK_LIB_H
+#define PSOCK_LIB_H
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+
+#define DATA_LEN			100
+#define DATA_CHAR			'a'
+#define DATA_CHAR_1			'b'
+
+#define PORT_BASE			8000
+
+#ifndef __maybe_unused
+# define __maybe_unused		__attribute__ ((__unused__))
+#endif
+
+static __maybe_unused void pair_udp_setfilter(int fd)
+{
+	/* the filter below checks for all of the following conditions that
+	 * are based on the contents of create_payload()
+	 *  ether type 0x800 and
+	 *  ip proto udp     and
+	 *  skb->len == DATA_LEN and
+	 *  udp[38] == 'a' or udp[38] == 'b'
+	 * It can be generated from the following bpf_asm input:
+	 *	ldh [12]
+	 *	jne #0x800, drop	; ETH_P_IP
+	 *	ldb [23]
+	 *	jneq #17, drop		; IPPROTO_UDP
+	 *	ld len			; ld skb->len
+	 *	jlt #100, drop		; DATA_LEN
+	 *	ldb [80]
+	 *	jeq #97, pass		; DATA_CHAR
+	 *	jne #98, drop		; DATA_CHAR_1
+	 *	pass:
+	 *	  ret #-1
+	 *	drop:
+	 *	  ret #0
+	 */
+	struct sock_filter bpf_filter[] = {
+		{ 0x28,  0,  0, 0x0000000c },
+		{ 0x15,  0,  8, 0x00000800 },
+		{ 0x30,  0,  0, 0x00000017 },
+		{ 0x15,  0,  6, 0x00000011 },
+		{ 0x80,  0,  0, 0000000000 },
+		{ 0x35,  0,  4, 0x00000064 },
+		{ 0x30,  0,  0, 0x00000050 },
+		{ 0x15,  1,  0, 0x00000061 },
+		{ 0x15,  0,  1, 0x00000062 },
+		{ 0x06,  0,  0, 0xffffffff },
+		{ 0x06,  0,  0, 0000000000 },
+	};
+	struct sock_fprog bpf_prog;
+
+	bpf_prog.filter = bpf_filter;
+	bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
+		       sizeof(bpf_prog))) {
+		perror("setsockopt SO_ATTACH_FILTER");
+		exit(1);
+	}
+}
+
+static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
+{
+	struct sockaddr_in saddr, daddr;
+
+	fds[0] = socket(PF_INET, SOCK_DGRAM, 0);
+	fds[1] = socket(PF_INET, SOCK_DGRAM, 0);
+	if (fds[0] == -1 || fds[1] == -1) {
+		fprintf(stderr, "ERROR: socket dgram\n");
+		exit(1);
+	}
+
+	memset(&saddr, 0, sizeof(saddr));
+	saddr.sin_family = AF_INET;
+	saddr.sin_port = htons(port);
+	saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+	memset(&daddr, 0, sizeof(daddr));
+	daddr.sin_family = AF_INET;
+	daddr.sin_port = htons(port + 1);
+	daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+	/* must bind both to get consistent hash result */
+	if (bind(fds[1], (void *) &daddr, sizeof(daddr))) {
+		perror("bind");
+		exit(1);
+	}
+	if (bind(fds[0], (void *) &saddr, sizeof(saddr))) {
+		perror("bind");
+		exit(1);
+	}
+	if (connect(fds[0], (void *) &daddr, sizeof(daddr))) {
+		perror("connect");
+		exit(1);
+	}
+}
+
+static __maybe_unused void pair_udp_send_char(int fds[], int num, char payload)
+{
+	char buf[DATA_LEN], rbuf[DATA_LEN];
+
+	memset(buf, payload, sizeof(buf));
+	while (num--) {
+		/* Should really handle EINTR and EAGAIN */
+		if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
+			fprintf(stderr, "ERROR: send failed left=%d\n", num);
+			exit(1);
+		}
+		if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) {
+			fprintf(stderr, "ERROR: recv failed left=%d\n", num);
+			exit(1);
+		}
+		if (memcmp(buf, rbuf, sizeof(buf))) {
+			fprintf(stderr, "ERROR: data failed left=%d\n", num);
+			exit(1);
+		}
+	}
+}
+
+static __maybe_unused void pair_udp_send(int fds[], int num)
+{
+	return pair_udp_send_char(fds, num, DATA_CHAR);
+}
+
+static __maybe_unused void pair_udp_close(int fds[])
+{
+	close(fds[0]);
+	close(fds[1]);
+}
+
+#endif /* PSOCK_LIB_H */
diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
new file mode 100644
index 000000000..7d15e10a9
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+static bool	cfg_use_bind;
+static bool	cfg_use_csum_off;
+static bool	cfg_use_csum_off_bad;
+static bool	cfg_use_dgram;
+static bool	cfg_use_gso;
+static bool	cfg_use_qdisc_bypass;
+static bool	cfg_use_vlan;
+static bool	cfg_use_vnet;
+
+static char	*cfg_ifname = "lo";
+static int	cfg_mtu	= 1500;
+static int	cfg_payload_len = DATA_LEN;
+static int	cfg_truncate_len = INT_MAX;
+static uint16_t	cfg_port = 8000;
+
+/* test sending up to max mtu + 1 */
+#define TEST_SZ	(sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU + 1)
+
+static char tbuf[TEST_SZ], rbuf[TEST_SZ];
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_u16; i++)
+		sum += start[i];
+
+	return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+			      unsigned long sum)
+{
+	sum += add_csum_hword(start, num_u16);
+
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+
+	return ~sum;
+}
+
+static int build_vnet_header(void *header)
+{
+	struct virtio_net_hdr *vh = header;
+
+	vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+
+	if (cfg_use_csum_off) {
+		vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
+		vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+
+		/* position check field exactly one byte beyond end of packet */
+		if (cfg_use_csum_off_bad)
+			vh->csum_start += sizeof(struct udphdr) + cfg_payload_len -
+					  vh->csum_offset - 1;
+	}
+
+	if (cfg_use_gso) {
+		vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+		vh->gso_size = cfg_mtu - sizeof(struct iphdr);
+	}
+
+	return sizeof(*vh);
+}
+
+static int build_eth_header(void *header)
+{
+	struct ethhdr *eth = header;
+
+	if (cfg_use_vlan) {
+		uint16_t *tag = header + ETH_HLEN;
+
+		eth->h_proto = htons(ETH_P_8021Q);
+		tag[1] = htons(ETH_P_IP);
+		return ETH_HLEN + 4;
+	}
+
+	eth->h_proto = htons(ETH_P_IP);
+	return ETH_HLEN;
+}
+
+static int build_ipv4_header(void *header, int payload_len)
+{
+	struct iphdr *iph = header;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->ttl = 8;
+	iph->tot_len = htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
+	iph->id = htons(1337);
+	iph->protocol = IPPROTO_UDP;
+	iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
+	iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
+	iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+
+	return iph->ihl << 2;
+}
+
+static int build_udp_header(void *header, int payload_len)
+{
+	const int alen = sizeof(uint32_t);
+	struct udphdr *udph = header;
+	int len = sizeof(*udph) + payload_len;
+
+	udph->source = htons(9);
+	udph->dest = htons(cfg_port);
+	udph->len = htons(len);
+
+	if (cfg_use_csum_off)
+		udph->check = build_ip_csum(header - (2 * alen), alen,
+					    htons(IPPROTO_UDP) + udph->len);
+	else
+		udph->check = 0;
+
+	return sizeof(*udph);
+}
+
+static int build_packet(int payload_len)
+{
+	int off = 0;
+
+	off += build_vnet_header(tbuf);
+	off += build_eth_header(tbuf + off);
+	off += build_ipv4_header(tbuf + off, payload_len);
+	off += build_udp_header(tbuf + off, payload_len);
+
+	if (off + payload_len > sizeof(tbuf))
+		error(1, 0, "payload length exceeds max");
+
+	memset(tbuf + off, DATA_CHAR, payload_len);
+
+	return off + payload_len;
+}
+
+static void do_bind(int fd)
+{
+	struct sockaddr_ll laddr = {0};
+
+	laddr.sll_family = AF_PACKET;
+	laddr.sll_protocol = htons(ETH_P_IP);
+	laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+	if (!laddr.sll_ifindex)
+		error(1, errno, "if_nametoindex");
+
+	if (bind(fd, (void *)&laddr, sizeof(laddr)))
+		error(1, errno, "bind");
+}
+
+static void do_send(int fd, char *buf, int len)
+{
+	int ret;
+
+	if (!cfg_use_vnet) {
+		buf += sizeof(struct virtio_net_hdr);
+		len -= sizeof(struct virtio_net_hdr);
+	}
+	if (cfg_use_dgram) {
+		buf += ETH_HLEN;
+		len -= ETH_HLEN;
+	}
+
+	if (cfg_use_bind) {
+		ret = write(fd, buf, len);
+	} else {
+		struct sockaddr_ll laddr = {0};
+
+		laddr.sll_protocol = htons(ETH_P_IP);
+		laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+		if (!laddr.sll_ifindex)
+			error(1, errno, "if_nametoindex");
+
+		ret = sendto(fd, buf, len, 0, (void *)&laddr, sizeof(laddr));
+	}
+
+	if (ret == -1)
+		error(1, errno, "write");
+	if (ret != len)
+		error(1, 0, "write: %u %u", ret, len);
+
+	fprintf(stderr, "tx: %u\n", ret);
+}
+
+static int do_tx(void)
+{
+	const int one = 1;
+	int fd, len;
+
+	fd = socket(PF_PACKET, cfg_use_dgram ? SOCK_DGRAM : SOCK_RAW, 0);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	if (cfg_use_bind)
+		do_bind(fd);
+
+	if (cfg_use_qdisc_bypass &&
+	    setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one)))
+		error(1, errno, "setsockopt qdisc bypass");
+
+	if (cfg_use_vnet &&
+	    setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+		error(1, errno, "setsockopt vnet");
+
+	len = build_packet(cfg_payload_len);
+
+	if (cfg_truncate_len < len)
+		len = cfg_truncate_len;
+
+	do_send(fd, tbuf, len);
+
+	if (close(fd))
+		error(1, errno, "close t");
+
+	return len;
+}
+
+static int setup_rx(void)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	struct sockaddr_in raddr = {0};
+	int fd;
+
+	fd = socket(PF_INET, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	raddr.sin_family = AF_INET;
+	raddr.sin_port = htons(cfg_port);
+	raddr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+	if (bind(fd, (void *)&raddr, sizeof(raddr)))
+		error(1, errno, "bind r");
+
+	return fd;
+}
+
+static void do_rx(int fd, int expected_len, char *expected)
+{
+	int ret;
+
+	ret = recv(fd, rbuf, sizeof(rbuf), 0);
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (ret != expected_len)
+		error(1, 0, "recv: %u != %u", ret, expected_len);
+
+	if (memcmp(rbuf, expected, ret))
+		error(1, 0, "recv: data mismatch");
+
+	fprintf(stderr, "rx: %u\n", ret);
+}
+
+static int setup_sniffer(void)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	int fd;
+
+	fd = socket(PF_PACKET, SOCK_RAW, 0);
+	if (fd == -1)
+		error(1, errno, "socket p");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	pair_udp_setfilter(fd);
+	do_bind(fd);
+
+	return fd;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "bcCdgl:qt:vV")) != -1) {
+		switch (c) {
+		case 'b':
+			cfg_use_bind = true;
+			break;
+		case 'c':
+			cfg_use_csum_off = true;
+			break;
+		case 'C':
+			cfg_use_csum_off_bad = true;
+			break;
+		case 'd':
+			cfg_use_dgram = true;
+			break;
+		case 'g':
+			cfg_use_gso = true;
+			break;
+		case 'l':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'q':
+			cfg_use_qdisc_bypass = true;
+			break;
+		case 't':
+			cfg_truncate_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'v':
+			cfg_use_vnet = true;
+			break;
+		case 'V':
+			cfg_use_vlan = true;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+
+	if (cfg_use_vlan && cfg_use_dgram)
+		error(1, 0, "option vlan (-V) conflicts with dgram (-d)");
+
+	if (cfg_use_csum_off && !cfg_use_vnet)
+		error(1, 0, "option csum offload (-c) requires vnet (-v)");
+
+	if (cfg_use_csum_off_bad && !cfg_use_csum_off)
+		error(1, 0, "option csum bad (-C) requires csum offload (-c)");
+
+	if (cfg_use_gso && !cfg_use_csum_off)
+		error(1, 0, "option gso (-g) requires csum offload (-c)");
+}
+
+static void run_test(void)
+{
+	int fdr, fds, total_len;
+
+	fdr = setup_rx();
+	fds = setup_sniffer();
+
+	total_len = do_tx();
+
+	/* BPF filter accepts only this length, vlan changes MAC */
+	if (cfg_payload_len == DATA_LEN && !cfg_use_vlan)
+		do_rx(fds, total_len - sizeof(struct virtio_net_hdr),
+		      tbuf + sizeof(struct virtio_net_hdr));
+
+	do_rx(fdr, cfg_payload_len, tbuf + total_len - cfg_payload_len);
+
+	if (close(fds))
+		error(1, errno, "close s");
+	if (close(fdr))
+		error(1, errno, "close r");
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (system("ip link set dev lo mtu 1500"))
+		error(1, errno, "ip link set mtu");
+	if (system("ip addr add dev lo 172.17.0.1/24"))
+		error(1, errno, "ip addr add");
+
+	run_test();
+
+	fprintf(stderr, "OK\n\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
new file mode 100755
index 000000000..6331d91b8
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of packet socket send regression tests
+
+set -e
+
+readonly mtu=1500
+readonly iphlen=20
+readonly udphlen=8
+
+readonly vnet_hlen=10
+readonly eth_hlen=14
+
+readonly mss="$((${mtu} - ${iphlen} - ${udphlen}))"
+readonly mss_exceeds="$((${mss} + 1))"
+
+readonly max_mtu=65535
+readonly max_mss="$((${max_mtu} - ${iphlen} - ${udphlen}))"
+readonly max_mss_exceeds="$((${max_mss} + 1))"
+
+# functional checks (not a full cross-product)
+
+echo "dgram"
+./in_netns.sh ./psock_snd -d
+
+echo "dgram bind"
+./in_netns.sh ./psock_snd -d -b
+
+echo "raw"
+./in_netns.sh ./psock_snd
+
+echo "raw bind"
+./in_netns.sh ./psock_snd -b
+
+echo "raw qdisc bypass"
+./in_netns.sh ./psock_snd -q
+
+echo "raw vlan"
+./in_netns.sh ./psock_snd -V
+
+echo "raw vnet hdr"
+./in_netns.sh ./psock_snd -v
+
+echo "raw csum_off"
+./in_netns.sh ./psock_snd -v -c
+
+echo "raw csum_off with bad offset (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -C)
+
+
+# bounds check: send {max, max + 1, min, min - 1} lengths
+
+echo "raw min size"
+./in_netns.sh ./psock_snd -l 0
+
+echo "raw mtu size"
+./in_netns.sh ./psock_snd -l "${mss}"
+
+echo "raw mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
+
+# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
+#
+# echo "raw vlan mtu size"
+# ./in_netns.sh ./psock_snd -V -l "${mss}"
+
+echo "raw vlan mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
+
+echo "dgram mtu size"
+./in_netns.sh ./psock_snd -d -l "${mss}"
+
+echo "dgram mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
+
+echo "raw truncate hlen (fails: does not arrive)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
+
+echo "raw truncate hlen - 1 (fails: EINVAL)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
+
+
+# gso checks: implies -l, because with gso len must exceed gso_size
+
+echo "raw gso min size"
+./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
+
+echo "raw gso min size - 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
+
+echo "raw gso max size"
+./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
+
+echo "raw gso max size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
+
+echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
new file mode 100644
index 000000000..7ec4fa4d5
--- /dev/null
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -0,0 +1,864 @@
+/*
+ * Copyright 2013 Red Hat, Inc.
+ * Author: Daniel Borkmann <dborkman@redhat.com>
+ *         Chetan Loke <loke.chetan@gmail.com> (TPACKET_V3 usage example)
+ *
+ * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior.
+ *
+ * Control:
+ *   Test the setup of the TPACKET socket with different patterns that are
+ *   known to fail (TODO) resp. succeed (OK).
+ *
+ * Datapath:
+ *   Open a pair of packet sockets and send resp. receive an a priori known
+ *   packet pattern accross the sockets and check if it was received resp.
+ *   sent correctly. Fanout in combination with RX_RING is currently not
+ *   tested here.
+ *
+ *   The test currently runs for
+ *   - TPACKET_V1: RX_RING, TX_RING
+ *   - TPACKET_V2: RX_RING, TX_RING
+ *   - TPACKET_V3: RX_RING
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <linux/if_packet.h>
+#include <linux/filter.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <arpa/inet.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <net/if.h>
+#include <inttypes.h>
+#include <poll.h>
+
+#include "psock_lib.h"
+
+#include "../kselftest.h"
+
+#ifndef bug_on
+# define bug_on(cond)		assert(!(cond))
+#endif
+
+#ifndef __aligned_tpacket
+# define __aligned_tpacket	__attribute__((aligned(TPACKET_ALIGNMENT)))
+#endif
+
+#ifndef __align_tpacket
+# define __align_tpacket(x)	__attribute__((aligned(TPACKET_ALIGN(x))))
+#endif
+
+#define NUM_PACKETS		100
+#define ALIGN_8(x)		(((x) + 8 - 1) & ~(8 - 1))
+
+struct ring {
+	struct iovec *rd;
+	uint8_t *mm_space;
+	size_t mm_len, rd_len;
+	struct sockaddr_ll ll;
+	void (*walk)(int sock, struct ring *ring);
+	int type, rd_num, flen, version;
+	union {
+		struct tpacket_req  req;
+		struct tpacket_req3 req3;
+	};
+};
+
+struct block_desc {
+	uint32_t version;
+	uint32_t offset_to_priv;
+	struct tpacket_hdr_v1 h1;
+};
+
+union frame_map {
+	struct {
+		struct tpacket_hdr tp_h __aligned_tpacket;
+		struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr));
+	} *v1;
+	struct {
+		struct tpacket2_hdr tp_h __aligned_tpacket;
+		struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr));
+	} *v2;
+	void *raw;
+};
+
+static unsigned int total_packets, total_bytes;
+
+static int pfsocket(int ver)
+{
+	int ret, sock = socket(PF_PACKET, SOCK_RAW, 0);
+	if (sock == -1) {
+		perror("socket");
+		exit(1);
+	}
+
+	ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
+	if (ret == -1) {
+		perror("setsockopt");
+		exit(1);
+	}
+
+	return sock;
+}
+
+static void status_bar_update(void)
+{
+	if (total_packets % 10 == 0) {
+		fprintf(stderr, ".");
+		fflush(stderr);
+	}
+}
+
+static void test_payload(void *pay, size_t len)
+{
+	struct ethhdr *eth = pay;
+
+	if (len < sizeof(struct ethhdr)) {
+		fprintf(stderr, "test_payload: packet too "
+			"small: %zu bytes!\n", len);
+		exit(1);
+	}
+
+	if (eth->h_proto != htons(ETH_P_IP)) {
+		fprintf(stderr, "test_payload: wrong ethernet "
+			"type: 0x%x!\n", ntohs(eth->h_proto));
+		exit(1);
+	}
+}
+
+static void create_payload(void *pay, size_t *len)
+{
+	int i;
+	struct ethhdr *eth = pay;
+	struct iphdr *ip = pay + sizeof(*eth);
+
+	/* Lets create some broken crap, that still passes
+	 * our BPF filter.
+	 */
+
+	*len = DATA_LEN + 42;
+
+	memset(pay, 0xff, ETH_ALEN * 2);
+	eth->h_proto = htons(ETH_P_IP);
+
+	for (i = 0; i < sizeof(*ip); ++i)
+		((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand();
+
+	ip->ihl = 5;
+	ip->version = 4;
+	ip->protocol = 0x11;
+	ip->frag_off = 0;
+	ip->ttl = 64;
+	ip->tot_len = htons((uint16_t) *len - sizeof(*eth));
+
+	ip->saddr = htonl(INADDR_LOOPBACK);
+	ip->daddr = htonl(INADDR_LOOPBACK);
+
+	memset(pay + sizeof(*eth) + sizeof(*ip),
+	       DATA_CHAR, DATA_LEN);
+}
+
+static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr)
+{
+	return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
+}
+
+static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr)
+{
+	hdr->tp_status = TP_STATUS_KERNEL;
+	__sync_synchronize();
+}
+
+static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr)
+{
+	return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
+}
+
+static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr)
+{
+	hdr->tp_status = TP_STATUS_KERNEL;
+	__sync_synchronize();
+}
+
+static inline int __v1_v2_rx_kernel_ready(void *base, int version)
+{
+	switch (version) {
+	case TPACKET_V1:
+		return __v1_rx_kernel_ready(base);
+	case TPACKET_V2:
+		return __v2_rx_kernel_ready(base);
+	default:
+		bug_on(1);
+		return 0;
+	}
+}
+
+static inline void __v1_v2_rx_user_ready(void *base, int version)
+{
+	switch (version) {
+	case TPACKET_V1:
+		__v1_rx_user_ready(base);
+		break;
+	case TPACKET_V2:
+		__v2_rx_user_ready(base);
+		break;
+	}
+}
+
+static void walk_v1_v2_rx(int sock, struct ring *ring)
+{
+	struct pollfd pfd;
+	int udp_sock[2];
+	union frame_map ppd;
+	unsigned int frame_num = 0;
+
+	bug_on(ring->type != PACKET_RX_RING);
+
+	pair_udp_open(udp_sock, PORT_BASE);
+
+	memset(&pfd, 0, sizeof(pfd));
+	pfd.fd = sock;
+	pfd.events = POLLIN | POLLERR;
+	pfd.revents = 0;
+
+	pair_udp_send(udp_sock, NUM_PACKETS);
+
+	while (total_packets < NUM_PACKETS * 2) {
+		while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base,
+					       ring->version)) {
+			ppd.raw = ring->rd[frame_num].iov_base;
+
+			switch (ring->version) {
+			case TPACKET_V1:
+				test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac,
+					     ppd.v1->tp_h.tp_snaplen);
+				total_bytes += ppd.v1->tp_h.tp_snaplen;
+				break;
+
+			case TPACKET_V2:
+				test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac,
+					     ppd.v2->tp_h.tp_snaplen);
+				total_bytes += ppd.v2->tp_h.tp_snaplen;
+				break;
+			}
+
+			status_bar_update();
+			total_packets++;
+
+			__v1_v2_rx_user_ready(ppd.raw, ring->version);
+
+			frame_num = (frame_num + 1) % ring->rd_num;
+		}
+
+		poll(&pfd, 1, 1);
+	}
+
+	pair_udp_close(udp_sock);
+
+	if (total_packets != 2 * NUM_PACKETS) {
+		fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
+			ring->version, total_packets, NUM_PACKETS);
+		exit(1);
+	}
+
+	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
+}
+
+static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr)
+{
+	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr)
+{
+	hdr->tp_status = TP_STATUS_SEND_REQUEST;
+	__sync_synchronize();
+}
+
+static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr)
+{
+	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
+{
+	hdr->tp_status = TP_STATUS_SEND_REQUEST;
+	__sync_synchronize();
+}
+
+static inline int __v3_tx_kernel_ready(struct tpacket3_hdr *hdr)
+{
+	return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v3_tx_user_ready(struct tpacket3_hdr *hdr)
+{
+	hdr->tp_status = TP_STATUS_SEND_REQUEST;
+	__sync_synchronize();
+}
+
+static inline int __tx_kernel_ready(void *base, int version)
+{
+	switch (version) {
+	case TPACKET_V1:
+		return __v1_tx_kernel_ready(base);
+	case TPACKET_V2:
+		return __v2_tx_kernel_ready(base);
+	case TPACKET_V3:
+		return __v3_tx_kernel_ready(base);
+	default:
+		bug_on(1);
+		return 0;
+	}
+}
+
+static inline void __tx_user_ready(void *base, int version)
+{
+	switch (version) {
+	case TPACKET_V1:
+		__v1_tx_user_ready(base);
+		break;
+	case TPACKET_V2:
+		__v2_tx_user_ready(base);
+		break;
+	case TPACKET_V3:
+		__v3_tx_user_ready(base);
+		break;
+	}
+}
+
+static void __v1_v2_set_packet_loss_discard(int sock)
+{
+	int ret, discard = 1;
+
+	ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard,
+			 sizeof(discard));
+	if (ret == -1) {
+		perror("setsockopt");
+		exit(1);
+	}
+}
+
+static inline void *get_next_frame(struct ring *ring, int n)
+{
+	uint8_t *f0 = ring->rd[0].iov_base;
+
+	switch (ring->version) {
+	case TPACKET_V1:
+	case TPACKET_V2:
+		return ring->rd[n].iov_base;
+	case TPACKET_V3:
+		return f0 + (n * ring->req3.tp_frame_size);
+	default:
+		bug_on(1);
+	}
+}
+
+static void walk_tx(int sock, struct ring *ring)
+{
+	struct pollfd pfd;
+	int rcv_sock, ret;
+	size_t packet_len;
+	union frame_map ppd;
+	char packet[1024];
+	unsigned int frame_num = 0, got = 0;
+	struct sockaddr_ll ll = {
+		.sll_family = PF_PACKET,
+		.sll_halen = ETH_ALEN,
+	};
+	int nframes;
+
+	/* TPACKET_V{1,2} sets up the ring->rd* related variables based
+	 * on frames (e.g., rd_num is tp_frame_nr) whereas V3 sets these
+	 * up based on blocks (e.g, rd_num is  tp_block_nr)
+	 */
+	if (ring->version <= TPACKET_V2)
+		nframes = ring->rd_num;
+	else
+		nframes = ring->req3.tp_frame_nr;
+
+	bug_on(ring->type != PACKET_TX_RING);
+	bug_on(nframes < NUM_PACKETS);
+
+	rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+	if (rcv_sock == -1) {
+		perror("socket");
+		exit(1);
+	}
+
+	pair_udp_setfilter(rcv_sock);
+
+	ll.sll_ifindex = if_nametoindex("lo");
+	ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll));
+	if (ret == -1) {
+		perror("bind");
+		exit(1);
+	}
+
+	memset(&pfd, 0, sizeof(pfd));
+	pfd.fd = sock;
+	pfd.events = POLLOUT | POLLERR;
+	pfd.revents = 0;
+
+	total_packets = NUM_PACKETS;
+	create_payload(packet, &packet_len);
+
+	while (total_packets > 0) {
+		void *next = get_next_frame(ring, frame_num);
+
+		while (__tx_kernel_ready(next, ring->version) &&
+		       total_packets > 0) {
+			ppd.raw = next;
+
+			switch (ring->version) {
+			case TPACKET_V1:
+				ppd.v1->tp_h.tp_snaplen = packet_len;
+				ppd.v1->tp_h.tp_len = packet_len;
+
+				memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN -
+				       sizeof(struct sockaddr_ll), packet,
+				       packet_len);
+				total_bytes += ppd.v1->tp_h.tp_snaplen;
+				break;
+
+			case TPACKET_V2:
+				ppd.v2->tp_h.tp_snaplen = packet_len;
+				ppd.v2->tp_h.tp_len = packet_len;
+
+				memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN -
+				       sizeof(struct sockaddr_ll), packet,
+				       packet_len);
+				total_bytes += ppd.v2->tp_h.tp_snaplen;
+				break;
+			case TPACKET_V3: {
+				struct tpacket3_hdr *tx = next;
+
+				tx->tp_snaplen = packet_len;
+				tx->tp_len = packet_len;
+				tx->tp_next_offset = 0;
+
+				memcpy((uint8_t *)tx + TPACKET3_HDRLEN -
+				       sizeof(struct sockaddr_ll), packet,
+				       packet_len);
+				total_bytes += tx->tp_snaplen;
+				break;
+			}
+			}
+
+			status_bar_update();
+			total_packets--;
+
+			__tx_user_ready(next, ring->version);
+
+			frame_num = (frame_num + 1) % nframes;
+		}
+
+		poll(&pfd, 1, 1);
+	}
+
+	bug_on(total_packets != 0);
+
+	ret = sendto(sock, NULL, 0, 0, NULL, 0);
+	if (ret == -1) {
+		perror("sendto");
+		exit(1);
+	}
+
+	while ((ret = recvfrom(rcv_sock, packet, sizeof(packet),
+			       0, NULL, NULL)) > 0 &&
+	       total_packets < NUM_PACKETS) {
+		got += ret;
+		test_payload(packet, ret);
+
+		status_bar_update();
+		total_packets++;
+	}
+
+	close(rcv_sock);
+
+	if (total_packets != NUM_PACKETS) {
+		fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
+			ring->version, total_packets, NUM_PACKETS);
+		exit(1);
+	}
+
+	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got);
+}
+
+static void walk_v1_v2(int sock, struct ring *ring)
+{
+	if (ring->type == PACKET_RX_RING)
+		walk_v1_v2_rx(sock, ring);
+	else
+		walk_tx(sock, ring);
+}
+
+static uint64_t __v3_prev_block_seq_num = 0;
+
+void __v3_test_block_seq_num(struct block_desc *pbd)
+{
+	if (__v3_prev_block_seq_num + 1 != pbd->h1.seq_num) {
+		fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected "
+			"seq:%"PRIu64" != actual seq:%"PRIu64"\n",
+			__v3_prev_block_seq_num, __v3_prev_block_seq_num + 1,
+			(uint64_t) pbd->h1.seq_num);
+		exit(1);
+	}
+
+	__v3_prev_block_seq_num = pbd->h1.seq_num;
+}
+
+static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
+{
+	if (pbd->h1.num_pkts && bytes != pbd->h1.blk_len) {
+		fprintf(stderr, "\nblock:%u with %upackets, expected "
+			"len:%u != actual len:%u\n", block_num,
+			pbd->h1.num_pkts, bytes, pbd->h1.blk_len);
+		exit(1);
+	}
+}
+
+static void __v3_test_block_header(struct block_desc *pbd, const int block_num)
+{
+	if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
+		fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num);
+		exit(1);
+	}
+
+	__v3_test_block_seq_num(pbd);
+}
+
+static void __v3_walk_block(struct block_desc *pbd, const int block_num)
+{
+	int num_pkts = pbd->h1.num_pkts, i;
+	unsigned long bytes = 0, bytes_with_padding = ALIGN_8(sizeof(*pbd));
+	struct tpacket3_hdr *ppd;
+
+	__v3_test_block_header(pbd, block_num);
+
+	ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
+				       pbd->h1.offset_to_first_pkt);
+
+	for (i = 0; i < num_pkts; ++i) {
+		bytes += ppd->tp_snaplen;
+
+		if (ppd->tp_next_offset)
+			bytes_with_padding += ppd->tp_next_offset;
+		else
+			bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac);
+
+		test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen);
+
+		status_bar_update();
+		total_packets++;
+
+		ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset);
+		__sync_synchronize();
+	}
+
+	__v3_test_block_len(pbd, bytes_with_padding, block_num);
+	total_bytes += bytes;
+}
+
+void __v3_flush_block(struct block_desc *pbd)
+{
+	pbd->h1.block_status = TP_STATUS_KERNEL;
+	__sync_synchronize();
+}
+
+static void walk_v3_rx(int sock, struct ring *ring)
+{
+	unsigned int block_num = 0;
+	struct pollfd pfd;
+	struct block_desc *pbd;
+	int udp_sock[2];
+
+	bug_on(ring->type != PACKET_RX_RING);
+
+	pair_udp_open(udp_sock, PORT_BASE);
+
+	memset(&pfd, 0, sizeof(pfd));
+	pfd.fd = sock;
+	pfd.events = POLLIN | POLLERR;
+	pfd.revents = 0;
+
+	pair_udp_send(udp_sock, NUM_PACKETS);
+
+	while (total_packets < NUM_PACKETS * 2) {
+		pbd = (struct block_desc *) ring->rd[block_num].iov_base;
+
+		while ((pbd->h1.block_status & TP_STATUS_USER) == 0)
+			poll(&pfd, 1, 1);
+
+		__v3_walk_block(pbd, block_num);
+		__v3_flush_block(pbd);
+
+		block_num = (block_num + 1) % ring->rd_num;
+	}
+
+	pair_udp_close(udp_sock);
+
+	if (total_packets != 2 * NUM_PACKETS) {
+		fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n",
+			total_packets, NUM_PACKETS);
+		exit(1);
+	}
+
+	fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
+}
+
+static void walk_v3(int sock, struct ring *ring)
+{
+	if (ring->type == PACKET_RX_RING)
+		walk_v3_rx(sock, ring);
+	else
+		walk_tx(sock, ring);
+}
+
+static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
+{
+	ring->req.tp_block_size = getpagesize() << 2;
+	ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7;
+	ring->req.tp_block_nr = blocks;
+
+	ring->req.tp_frame_nr = ring->req.tp_block_size /
+				ring->req.tp_frame_size *
+				ring->req.tp_block_nr;
+
+	ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr;
+	ring->walk = walk_v1_v2;
+	ring->rd_num = ring->req.tp_frame_nr;
+	ring->flen = ring->req.tp_frame_size;
+}
+
+static void __v3_fill(struct ring *ring, unsigned int blocks, int type)
+{
+	if (type == PACKET_RX_RING) {
+		ring->req3.tp_retire_blk_tov = 64;
+		ring->req3.tp_sizeof_priv = 0;
+		ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+	}
+	ring->req3.tp_block_size = getpagesize() << 2;
+	ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
+	ring->req3.tp_block_nr = blocks;
+
+	ring->req3.tp_frame_nr = ring->req3.tp_block_size /
+				 ring->req3.tp_frame_size *
+				 ring->req3.tp_block_nr;
+
+	ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr;
+	ring->walk = walk_v3;
+	ring->rd_num = ring->req3.tp_block_nr;
+	ring->flen = ring->req3.tp_block_size;
+}
+
+static void setup_ring(int sock, struct ring *ring, int version, int type)
+{
+	int ret = 0;
+	unsigned int blocks = 256;
+
+	ring->type = type;
+	ring->version = version;
+
+	switch (version) {
+	case TPACKET_V1:
+	case TPACKET_V2:
+		if (type == PACKET_TX_RING)
+			__v1_v2_set_packet_loss_discard(sock);
+		__v1_v2_fill(ring, blocks);
+		ret = setsockopt(sock, SOL_PACKET, type, &ring->req,
+				 sizeof(ring->req));
+		break;
+
+	case TPACKET_V3:
+		__v3_fill(ring, blocks, type);
+		ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
+				 sizeof(ring->req3));
+		break;
+	}
+
+	if (ret == -1) {
+		perror("setsockopt");
+		exit(1);
+	}
+
+	ring->rd_len = ring->rd_num * sizeof(*ring->rd);
+	ring->rd = malloc(ring->rd_len);
+	if (ring->rd == NULL) {
+		perror("malloc");
+		exit(1);
+	}
+
+	total_packets = 0;
+	total_bytes = 0;
+}
+
+static void mmap_ring(int sock, struct ring *ring)
+{
+	int i;
+
+	ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE,
+			      MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
+	if (ring->mm_space == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	memset(ring->rd, 0, ring->rd_len);
+	for (i = 0; i < ring->rd_num; ++i) {
+		ring->rd[i].iov_base = ring->mm_space + (i * ring->flen);
+		ring->rd[i].iov_len = ring->flen;
+	}
+}
+
+static void bind_ring(int sock, struct ring *ring)
+{
+	int ret;
+
+	pair_udp_setfilter(sock);
+
+	ring->ll.sll_family = PF_PACKET;
+	ring->ll.sll_protocol = htons(ETH_P_ALL);
+	ring->ll.sll_ifindex = if_nametoindex("lo");
+	ring->ll.sll_hatype = 0;
+	ring->ll.sll_pkttype = 0;
+	ring->ll.sll_halen = 0;
+
+	ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll));
+	if (ret == -1) {
+		perror("bind");
+		exit(1);
+	}
+}
+
+static void walk_ring(int sock, struct ring *ring)
+{
+	ring->walk(sock, ring);
+}
+
+static void unmap_ring(int sock, struct ring *ring)
+{
+	munmap(ring->mm_space, ring->mm_len);
+	free(ring->rd);
+}
+
+static int test_kernel_bit_width(void)
+{
+	char in[512], *ptr;
+	int num = 0, fd;
+	ssize_t ret;
+
+	fd = open("/proc/kallsyms", O_RDONLY);
+	if (fd == -1) {
+		perror("open");
+		exit(1);
+	}
+
+	ret = read(fd, in, sizeof(in));
+	if (ret <= 0) {
+		perror("read");
+		exit(1);
+	}
+
+	close(fd);
+
+	ptr = in;
+	while(!isspace(*ptr)) {
+		num++;
+		ptr++;
+	}
+
+	return num * 4;
+}
+
+static int test_user_bit_width(void)
+{
+	return __WORDSIZE;
+}
+
+static const char *tpacket_str[] = {
+	[TPACKET_V1] = "TPACKET_V1",
+	[TPACKET_V2] = "TPACKET_V2",
+	[TPACKET_V3] = "TPACKET_V3",
+};
+
+static const char *type_str[] = {
+	[PACKET_RX_RING] = "PACKET_RX_RING",
+	[PACKET_TX_RING] = "PACKET_TX_RING",
+};
+
+static int test_tpacket(int version, int type)
+{
+	int sock;
+	struct ring ring;
+
+	fprintf(stderr, "test: %s with %s ", tpacket_str[version],
+		type_str[type]);
+	fflush(stderr);
+
+	if (version == TPACKET_V1 &&
+	    test_kernel_bit_width() != test_user_bit_width()) {
+		fprintf(stderr, "test: skip %s %s since user and kernel "
+			"space have different bit width\n",
+			tpacket_str[version], type_str[type]);
+		return KSFT_SKIP;
+	}
+
+	sock = pfsocket(version);
+	memset(&ring, 0, sizeof(ring));
+	setup_ring(sock, &ring, version, type);
+	mmap_ring(sock, &ring);
+	bind_ring(sock, &ring);
+	walk_ring(sock, &ring);
+	unmap_ring(sock, &ring);
+	close(sock);
+
+	fprintf(stderr, "\n");
+	return 0;
+}
+
+int main(void)
+{
+	int ret = 0;
+
+	ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING);
+	ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING);
+
+	ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING);
+	ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
+
+	ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
+	ret |= test_tpacket(TPACKET_V3, PACKET_TX_RING);
+
+	if (ret)
+		return 1;
+
+	printf("OK. All tests passed\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
new file mode 100644
index 000000000..7c5b12664
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -0,0 +1,114 @@
+/*
+ * Test for the regression introduced by
+ *
+ * b9470c27607b ("inet: kill smallest_size and smallest_port")
+ *
+ * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb
+ * when we open the ipv6 conterpart, which is what was happening previously.
+ */
+#include <errno.h>
+#include <error.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define PORT 9999
+
+int open_port(int ipv6, int any)
+{
+	int fd = -1;
+	int reuseaddr = 1;
+	int v6only = 1;
+	int addrlen;
+	int ret = -1;
+	struct sockaddr *addr;
+	int family = ipv6 ? AF_INET6 : AF_INET;
+
+	struct sockaddr_in6 addr6 = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(PORT),
+		.sin6_addr = in6addr_any
+	};
+	struct sockaddr_in addr4 = {
+		.sin_family = AF_INET,
+		.sin_port = htons(PORT),
+		.sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"),
+	};
+
+
+	if (ipv6) {
+		addr = (struct sockaddr*)&addr6;
+		addrlen = sizeof(addr6);
+	} else {
+		addr = (struct sockaddr*)&addr4;
+		addrlen = sizeof(addr4);
+	}
+
+	if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+		perror("socket");
+		goto out;
+	}
+
+	if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only,
+			       sizeof(v6only)) < 0) {
+		perror("setsockopt IPV6_V6ONLY");
+		goto out;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
+		       sizeof(reuseaddr)) < 0) {
+		perror("setsockopt SO_REUSEADDR");
+		goto out;
+	}
+
+	if (bind(fd, addr, addrlen) < 0) {
+		perror("bind");
+		goto out;
+	}
+
+	if (any)
+		return fd;
+
+	if (listen(fd, 1) < 0) {
+		perror("listen");
+		goto out;
+	}
+	return fd;
+out:
+	close(fd);
+	return ret;
+}
+
+int main(void)
+{
+	int listenfd;
+	int fd1, fd2;
+
+	fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT);
+	listenfd = open_port(0, 0);
+	if (listenfd < 0)
+		error(1, errno, "Couldn't open listen socket");
+	fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+	fd1 = open_port(0, 1);
+	if (fd1 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+	fprintf(stderr, "Opening in6addr_any:%d\n", PORT);
+	fd1 = open_port(1, 1);
+	if (fd1 < 0)
+		error(1, errno, "Couldn't open ipv6 reuseport");
+	fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+	fd2 = open_port(0, 1);
+	if (fd2 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+	close(fd1);
+	fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT);
+	fd1 = open_port(0, 1);
+	if (fd1 >= 0)
+		error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+	fprintf(stderr, "Success");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
new file mode 100644
index 000000000..b5277106d
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -0,0 +1,641 @@
+/*
+ * Test functionality of BPF filters for SO_REUSEPORT.  The tests below will use
+ * a BPF program (both classic and extended) to read the first word from an
+ * incoming packet (expected to be in network byte-order), calculate a modulus
+ * of that number, and then dispatch the packet to the Nth socket using the
+ * result.  These tests are run for each supported address family and protocol.
+ * Additionally, a few edge cases in the implementation are tested.
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+struct test_params {
+	int recv_family;
+	int send_family;
+	int protocol;
+	size_t recv_socks;
+	uint16_t recv_port;
+	uint16_t send_port_min;
+};
+
+static size_t sockaddr_size(void)
+{
+	return sizeof(struct sockaddr_storage);
+}
+
+static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
+{
+	struct sockaddr_storage *addr;
+	struct sockaddr_in *addr4;
+	struct sockaddr_in6 *addr6;
+
+	addr = malloc(sizeof(struct sockaddr_storage));
+	memset(addr, 0, sizeof(struct sockaddr_storage));
+
+	switch (family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)addr;
+		addr4->sin_family = AF_INET;
+		addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+		addr4->sin_port = htons(port);
+		break;
+	case AF_INET6:
+		addr6 = (struct sockaddr_in6 *)addr;
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_addr = in6addr_any;
+		addr6->sin6_port = htons(port);
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+	}
+	return (struct sockaddr *)addr;
+}
+
+static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
+{
+	struct sockaddr *addr = new_any_sockaddr(family, port);
+	struct sockaddr_in *addr4;
+	struct sockaddr_in6 *addr6;
+
+	switch (family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)addr;
+		addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		break;
+	case AF_INET6:
+		addr6 = (struct sockaddr_in6 *)addr;
+		addr6->sin6_addr = in6addr_loopback;
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+	}
+	return addr;
+}
+
+static void attach_ebpf(int fd, uint16_t mod)
+{
+	static char bpf_log_buf[65536];
+	static const char bpf_license[] = "GPL";
+
+	int bpf_fd;
+	const struct bpf_insn prog[] = {
+		/* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
+		{ BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
+		/* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
+		{ BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
+		/* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
+		{ BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
+		/* BPF_EXIT_INSN() */
+		{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+	};
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insn_cnt = ARRAY_SIZE(prog);
+	attr.insns = (unsigned long) &prog;
+	attr.license = (unsigned long) &bpf_license;
+	attr.log_buf = (unsigned long) &bpf_log_buf;
+	attr.log_size = sizeof(bpf_log_buf);
+	attr.log_level = 1;
+	attr.kern_version = 0;
+
+	bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+	if (bpf_fd < 0)
+		error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+			sizeof(bpf_fd)))
+		error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+
+	close(bpf_fd);
+}
+
+static void attach_cbpf(int fd, uint16_t mod)
+{
+	struct sock_filter code[] = {
+		/* A = (uint32_t)skb[0] */
+		{ BPF_LD  | BPF_W | BPF_ABS, 0, 0, 0 },
+		/* A = A % mod */
+		{ BPF_ALU | BPF_MOD, 0, 0, mod },
+		/* return A */
+		{ BPF_RET | BPF_A, 0, 0, 0 },
+	};
+	struct sock_fprog p = {
+		.len = ARRAY_SIZE(code),
+		.filter = code,
+	};
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+		error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
+			     void (*attach_bpf)(int, uint16_t))
+{
+	struct sockaddr * const addr =
+		new_any_sockaddr(p.recv_family, p.recv_port);
+	int i, opt;
+
+	for (i = 0; i < p.recv_socks; ++i) {
+		fd[i] = socket(p.recv_family, p.protocol, 0);
+		if (fd[i] < 0)
+			error(1, errno, "failed to create recv %d", i);
+
+		opt = 1;
+		if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+			       sizeof(opt)))
+			error(1, errno, "failed to set SO_REUSEPORT on %d", i);
+
+		if (i == 0)
+			attach_bpf(fd[i], mod);
+
+		if (bind(fd[i], addr, sockaddr_size()))
+			error(1, errno, "failed to bind recv socket %d", i);
+
+		if (p.protocol == SOCK_STREAM) {
+			opt = 4;
+			if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
+				       sizeof(opt)))
+				error(1, errno,
+				      "failed to set TCP_FASTOPEN on %d", i);
+			if (listen(fd[i], p.recv_socks * 10))
+				error(1, errno, "failed to listen on socket");
+		}
+	}
+	free(addr);
+}
+
+static void send_from(struct test_params p, uint16_t sport, char *buf,
+		      size_t len)
+{
+	struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
+	struct sockaddr * const daddr =
+		new_loopback_sockaddr(p.send_family, p.recv_port);
+	const int fd = socket(p.send_family, p.protocol, 0), one = 1;
+
+	if (fd < 0)
+		error(1, errno, "failed to create send socket");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
+		error(1, errno, "failed to set reuseaddr");
+
+	if (bind(fd, saddr, sockaddr_size()))
+		error(1, errno, "failed to bind send socket");
+
+	if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
+		error(1, errno, "failed to send message");
+
+	close(fd);
+	free(saddr);
+	free(daddr);
+}
+
+static void test_recv_order(const struct test_params p, int fd[], int mod)
+{
+	char recv_buf[8], send_buf[8];
+	struct msghdr msg;
+	struct iovec recv_io = { recv_buf, 8 };
+	struct epoll_event ev;
+	int epfd, conn, i, sport, expected;
+	uint32_t data, ndata;
+
+	epfd = epoll_create(1);
+	if (epfd < 0)
+		error(1, errno, "failed to create epoll");
+	for (i = 0; i < p.recv_socks; ++i) {
+		ev.events = EPOLLIN;
+		ev.data.fd = fd[i];
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
+			error(1, errno, "failed to register sock %d epoll", i);
+	}
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_iov = &recv_io;
+	msg.msg_iovlen = 1;
+
+	for (data = 0; data < p.recv_socks * 2; ++data) {
+		sport = p.send_port_min + data;
+		ndata = htonl(data);
+		memcpy(send_buf, &ndata, sizeof(ndata));
+		send_from(p, sport, send_buf, sizeof(ndata));
+
+		i = epoll_wait(epfd, &ev, 1, -1);
+		if (i < 0)
+			error(1, errno, "epoll wait failed");
+
+		if (p.protocol == SOCK_STREAM) {
+			conn = accept(ev.data.fd, NULL, NULL);
+			if (conn < 0)
+				error(1, errno, "error accepting");
+			i = recvmsg(conn, &msg, 0);
+			close(conn);
+		} else {
+			i = recvmsg(ev.data.fd, &msg, 0);
+		}
+		if (i < 0)
+			error(1, errno, "recvmsg error");
+		if (i != sizeof(ndata))
+			error(1, 0, "expected size %zd got %d",
+			      sizeof(ndata), i);
+
+		for (i = 0; i < p.recv_socks; ++i)
+			if (ev.data.fd == fd[i])
+				break;
+		memcpy(&ndata, recv_buf, sizeof(ndata));
+		fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
+
+		expected = (sport % mod);
+		if (i != expected)
+			error(1, 0, "expected socket %d", expected);
+	}
+}
+
+static void test_reuseport_ebpf(struct test_params p)
+{
+	int i, fd[p.recv_socks];
+
+	fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
+	build_recv_group(p, fd, p.recv_socks, attach_ebpf);
+	test_recv_order(p, fd, p.recv_socks);
+
+	p.send_port_min += p.recv_socks * 2;
+	fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+	attach_ebpf(fd[0], p.recv_socks / 2);
+	test_recv_order(p, fd, p.recv_socks / 2);
+
+	for (i = 0; i < p.recv_socks; ++i)
+		close(fd[i]);
+}
+
+static void test_reuseport_cbpf(struct test_params p)
+{
+	int i, fd[p.recv_socks];
+
+	fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
+	build_recv_group(p, fd, p.recv_socks, attach_cbpf);
+	test_recv_order(p, fd, p.recv_socks);
+
+	p.send_port_min += p.recv_socks * 2;
+	fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+	attach_cbpf(fd[0], p.recv_socks / 2);
+	test_recv_order(p, fd, p.recv_socks / 2);
+
+	for (i = 0; i < p.recv_socks; ++i)
+		close(fd[i]);
+}
+
+static void test_extra_filter(const struct test_params p)
+{
+	struct sockaddr * const addr =
+		new_any_sockaddr(p.recv_family, p.recv_port);
+	int fd1, fd2, opt;
+
+	fprintf(stderr, "Testing too many filters...\n");
+	fd1 = socket(p.recv_family, p.protocol, 0);
+	if (fd1 < 0)
+		error(1, errno, "failed to create socket 1");
+	fd2 = socket(p.recv_family, p.protocol, 0);
+	if (fd2 < 0)
+		error(1, errno, "failed to create socket 2");
+
+	opt = 1;
+	if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+		error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+	if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+		error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+	attach_ebpf(fd1, 10);
+	attach_ebpf(fd2, 10);
+
+	if (bind(fd1, addr, sockaddr_size()))
+		error(1, errno, "failed to bind recv socket 1");
+
+	if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
+		error(1, errno, "bind socket 2 should fail with EADDRINUSE");
+
+	free(addr);
+}
+
+static void test_filter_no_reuseport(const struct test_params p)
+{
+	struct sockaddr * const addr =
+		new_any_sockaddr(p.recv_family, p.recv_port);
+	const char bpf_license[] = "GPL";
+	struct bpf_insn ecode[] = {
+		{ BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
+		{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+	};
+	struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
+	union bpf_attr eprog;
+	struct sock_fprog cprog;
+	int fd, bpf_fd;
+
+	fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
+
+	memset(&eprog, 0, sizeof(eprog));
+	eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	eprog.insn_cnt = ARRAY_SIZE(ecode);
+	eprog.insns = (unsigned long) &ecode;
+	eprog.license = (unsigned long) &bpf_license;
+	eprog.kern_version = 0;
+
+	memset(&cprog, 0, sizeof(cprog));
+	cprog.len = ARRAY_SIZE(ccode);
+	cprog.filter = ccode;
+
+
+	bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
+	if (bpf_fd < 0)
+		error(1, errno, "ebpf error");
+	fd = socket(p.recv_family, p.protocol, 0);
+	if (fd < 0)
+		error(1, errno, "failed to create socket 1");
+
+	if (bind(fd, addr, sockaddr_size()))
+		error(1, errno, "failed to bind recv socket 1");
+
+	errno = 0;
+	if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+			sizeof(bpf_fd)) || errno != EINVAL)
+		error(1, errno, "setsockopt should have returned EINVAL");
+
+	errno = 0;
+	if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
+		       sizeof(cprog)) || errno != EINVAL)
+		error(1, errno, "setsockopt should have returned EINVAL");
+
+	free(addr);
+}
+
+static void test_filter_without_bind(void)
+{
+	int fd1, fd2, opt = 1;
+
+	fprintf(stderr, "Testing filter add without bind...\n");
+	fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+	if (fd1 < 0)
+		error(1, errno, "failed to create socket 1");
+	fd2 = socket(AF_INET, SOCK_DGRAM, 0);
+	if (fd2 < 0)
+		error(1, errno, "failed to create socket 2");
+	if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+		error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+	if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+		error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+	attach_ebpf(fd1, 10);
+	attach_cbpf(fd2, 10);
+
+	close(fd1);
+	close(fd2);
+}
+
+void enable_fastopen(void)
+{
+	int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
+	int rw_mask = 3;  /* bit 1: client side; bit-2 server side */
+	int val, size;
+	char buf[16];
+
+	if (fd < 0)
+		error(1, errno, "Unable to open tcp_fastopen sysctl");
+	if (read(fd, buf, sizeof(buf)) <= 0)
+		error(1, errno, "Unable to read tcp_fastopen sysctl");
+	val = atoi(buf);
+	close(fd);
+
+	if ((val & rw_mask) != rw_mask) {
+		fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+		if (fd < 0)
+			error(1, errno,
+			      "Unable to open tcp_fastopen sysctl for writing");
+		val |= rw_mask;
+		size = snprintf(buf, 16, "%d", val);
+		if (write(fd, buf, size) <= 0)
+			error(1, errno, "Unable to write tcp_fastopen sysctl");
+		close(fd);
+	}
+}
+
+static struct rlimit rlim_old;
+
+static  __attribute__((constructor)) void main_ctor(void)
+{
+	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+
+	if (rlim_old.rlim_cur != RLIM_INFINITY) {
+		struct rlimit rlim_new;
+
+		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+	}
+}
+
+static __attribute__((destructor)) void main_dtor(void)
+{
+	setrlimit(RLIMIT_MEMLOCK, &rlim_old);
+}
+
+int main(void)
+{
+	fprintf(stderr, "---- IPv4 UDP ----\n");
+	/* NOTE: UDP socket lookups traverse a different code path when there
+	 * are > 10 sockets in a group.  Run the bpf test through both paths.
+	 */
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 10,
+		.recv_port = 8000,
+		.send_port_min = 9000});
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8000,
+		.send_port_min = 9000});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 10,
+		.recv_port = 8001,
+		.send_port_min = 9020});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8001,
+		.send_port_min = 9020});
+	test_extra_filter((struct test_params) {
+		.recv_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_port = 8002});
+	test_filter_no_reuseport((struct test_params) {
+		.recv_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_port = 8008});
+
+	fprintf(stderr, "---- IPv6 UDP ----\n");
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET6,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 10,
+		.recv_port = 8003,
+		.send_port_min = 9040});
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET6,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8003,
+		.send_port_min = 9040});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET6,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 10,
+		.recv_port = 8004,
+		.send_port_min = 9060});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET6,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8004,
+		.send_port_min = 9060});
+	test_extra_filter((struct test_params) {
+		.recv_family = AF_INET6,
+		.protocol = SOCK_DGRAM,
+		.recv_port = 8005});
+	test_filter_no_reuseport((struct test_params) {
+		.recv_family = AF_INET6,
+		.protocol = SOCK_DGRAM,
+		.recv_port = 8009});
+
+	fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8006,
+		.send_port_min = 9080});
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 10,
+		.recv_port = 8006,
+		.send_port_min = 9080});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 10,
+		.recv_port = 8007,
+		.send_port_min = 9100});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET,
+		.protocol = SOCK_DGRAM,
+		.recv_socks = 20,
+		.recv_port = 8007,
+		.send_port_min = 9100});
+
+	/* TCP fastopen is required for the TCP tests */
+	enable_fastopen();
+	fprintf(stderr, "---- IPv4 TCP ----\n");
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET,
+		.send_family = AF_INET,
+		.protocol = SOCK_STREAM,
+		.recv_socks = 10,
+		.recv_port = 8008,
+		.send_port_min = 9120});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET,
+		.send_family = AF_INET,
+		.protocol = SOCK_STREAM,
+		.recv_socks = 10,
+		.recv_port = 8009,
+		.send_port_min = 9160});
+	test_extra_filter((struct test_params) {
+		.recv_family = AF_INET,
+		.protocol = SOCK_STREAM,
+		.recv_port = 8010});
+	test_filter_no_reuseport((struct test_params) {
+		.recv_family = AF_INET,
+		.protocol = SOCK_STREAM,
+		.recv_port = 8011});
+
+	fprintf(stderr, "---- IPv6 TCP ----\n");
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET6,
+		.protocol = SOCK_STREAM,
+		.recv_socks = 10,
+		.recv_port = 8012,
+		.send_port_min = 9200});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET6,
+		.protocol = SOCK_STREAM,
+		.recv_socks = 10,
+		.recv_port = 8013,
+		.send_port_min = 9240});
+	test_extra_filter((struct test_params) {
+		.recv_family = AF_INET6,
+		.protocol = SOCK_STREAM,
+		.recv_port = 8014});
+	test_filter_no_reuseport((struct test_params) {
+		.recv_family = AF_INET6,
+		.protocol = SOCK_STREAM,
+		.recv_port = 8015});
+
+	fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
+	test_reuseport_ebpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET,
+		.protocol = SOCK_STREAM,
+		.recv_socks = 10,
+		.recv_port = 8016,
+		.send_port_min = 9320});
+	test_reuseport_cbpf((struct test_params) {
+		.recv_family = AF_INET6,
+		.send_family = AF_INET,
+		.protocol = SOCK_STREAM,
+		.recv_socks = 10,
+		.recv_port = 8017,
+		.send_port_min = 9360});
+
+	test_filter_without_bind();
+
+	fprintf(stderr, "SUCCESS\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c
new file mode 100644
index 000000000..2d6461747
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test functionality of BPF filters with SO_REUSEPORT.  This program creates
+ * an SO_REUSEPORT receiver group containing one socket per CPU core. It then
+ * creates a BPF program that will select a socket from this group based
+ * on the core id that receives the packet.  The sending code artificially
+ * moves itself to run on different core ids and sends one message from
+ * each core.  Since these packets are delivered over loopback, they should
+ * arrive on the same core that sent them.  The receiving code then ensures
+ * that the packet was received on the socket for the corresponding core id.
+ * This entire process is done for several different core id permutations
+ * and for each IPv4/IPv6 and TCP/UDP combination.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const int PORT = 8888;
+
+static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
+{
+	struct sockaddr_storage addr;
+	struct sockaddr_in  *addr4;
+	struct sockaddr_in6 *addr6;
+	size_t i;
+	int opt;
+
+	switch (family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)&addr;
+		addr4->sin_family = AF_INET;
+		addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+		addr4->sin_port = htons(PORT);
+		break;
+	case AF_INET6:
+		addr6 = (struct sockaddr_in6 *)&addr;
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_addr = in6addr_any;
+		addr6->sin6_port = htons(PORT);
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+	}
+
+	for (i = 0; i < len; ++i) {
+		rcv_fd[i] = socket(family, proto, 0);
+		if (rcv_fd[i] < 0)
+			error(1, errno, "failed to create receive socket");
+
+		opt = 1;
+		if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+			       sizeof(opt)))
+			error(1, errno, "failed to set SO_REUSEPORT");
+
+		if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr)))
+			error(1, errno, "failed to bind receive socket");
+
+		if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10))
+			error(1, errno, "failed to listen on receive port");
+	}
+}
+
+static void attach_bpf(int fd)
+{
+	struct sock_filter code[] = {
+		/* A = raw_smp_processor_id() */
+		{ BPF_LD  | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU },
+		/* return A */
+		{ BPF_RET | BPF_A, 0, 0, 0 },
+	};
+	struct sock_fprog p = {
+		.len = 2,
+		.filter = code,
+	};
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+		error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void send_from_cpu(int cpu_id, int family, int proto)
+{
+	struct sockaddr_storage saddr, daddr;
+	struct sockaddr_in  *saddr4, *daddr4;
+	struct sockaddr_in6 *saddr6, *daddr6;
+	cpu_set_t cpu_set;
+	int fd;
+
+	switch (family) {
+	case AF_INET:
+		saddr4 = (struct sockaddr_in *)&saddr;
+		saddr4->sin_family = AF_INET;
+		saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
+		saddr4->sin_port = 0;
+
+		daddr4 = (struct sockaddr_in *)&daddr;
+		daddr4->sin_family = AF_INET;
+		daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		daddr4->sin_port = htons(PORT);
+		break;
+	case AF_INET6:
+		saddr6 = (struct sockaddr_in6 *)&saddr;
+		saddr6->sin6_family = AF_INET6;
+		saddr6->sin6_addr = in6addr_any;
+		saddr6->sin6_port = 0;
+
+		daddr6 = (struct sockaddr_in6 *)&daddr;
+		daddr6->sin6_family = AF_INET6;
+		daddr6->sin6_addr = in6addr_loopback;
+		daddr6->sin6_port = htons(PORT);
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+	}
+
+	memset(&cpu_set, 0, sizeof(cpu_set));
+	CPU_SET(cpu_id, &cpu_set);
+	if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0)
+		error(1, errno, "failed to pin to cpu");
+
+	fd = socket(family, proto, 0);
+	if (fd < 0)
+		error(1, errno, "failed to create send socket");
+
+	if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+		error(1, errno, "failed to bind send socket");
+
+	if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+		error(1, errno, "failed to connect send socket");
+
+	if (send(fd, "a", 1, 0) < 0)
+		error(1, errno, "failed to send message");
+
+	close(fd);
+}
+
+static
+void receive_on_cpu(int *rcv_fd, int len, int epfd, int cpu_id, int proto)
+{
+	struct epoll_event ev;
+	int i, fd;
+	char buf[8];
+
+	i = epoll_wait(epfd, &ev, 1, -1);
+	if (i < 0)
+		error(1, errno, "epoll_wait failed");
+
+	if (proto == SOCK_STREAM) {
+		fd = accept(ev.data.fd, NULL, NULL);
+		if (fd < 0)
+			error(1, errno, "failed to accept");
+		i = recv(fd, buf, sizeof(buf), 0);
+		close(fd);
+	} else {
+		i = recv(ev.data.fd, buf, sizeof(buf), 0);
+	}
+
+	if (i < 0)
+		error(1, errno, "failed to recv");
+
+	for (i = 0; i < len; ++i)
+		if (ev.data.fd == rcv_fd[i])
+			break;
+	if (i == len)
+		error(1, 0, "failed to find socket");
+	fprintf(stderr, "send cpu %d, receive socket %d\n", cpu_id, i);
+	if (cpu_id != i)
+		error(1, 0, "cpu id/receive socket mismatch");
+}
+
+static void test(int *rcv_fd, int len, int family, int proto)
+{
+	struct epoll_event ev;
+	int epfd, cpu;
+
+	build_rcv_group(rcv_fd, len, family, proto);
+	attach_bpf(rcv_fd[0]);
+
+	epfd = epoll_create(1);
+	if (epfd < 0)
+		error(1, errno, "failed to create epoll");
+	for (cpu = 0; cpu < len; ++cpu) {
+		ev.events = EPOLLIN;
+		ev.data.fd = rcv_fd[cpu];
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[cpu], &ev))
+			error(1, errno, "failed to register sock epoll");
+	}
+
+	/* Forward iterate */
+	for (cpu = 0; cpu < len; ++cpu) {
+		send_from_cpu(cpu, family, proto);
+		receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+	}
+
+	/* Reverse iterate */
+	for (cpu = len - 1; cpu >= 0; --cpu) {
+		send_from_cpu(cpu, family, proto);
+		receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+	}
+
+	/* Even cores */
+	for (cpu = 0; cpu < len; cpu += 2) {
+		send_from_cpu(cpu, family, proto);
+		receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+	}
+
+	/* Odd cores */
+	for (cpu = 1; cpu < len; cpu += 2) {
+		send_from_cpu(cpu, family, proto);
+		receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+	}
+
+	close(epfd);
+	for (cpu = 0; cpu < len; ++cpu)
+		close(rcv_fd[cpu]);
+}
+
+int main(void)
+{
+	int *rcv_fd, cpus;
+
+	cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	if (cpus <= 0)
+		error(1, errno, "failed counting cpus");
+
+	rcv_fd = calloc(cpus, sizeof(int));
+	if (!rcv_fd)
+		error(1, 0, "failed to allocate array");
+
+	fprintf(stderr, "---- IPv4 UDP ----\n");
+	test(rcv_fd, cpus, AF_INET, SOCK_DGRAM);
+
+	fprintf(stderr, "---- IPv6 UDP ----\n");
+	test(rcv_fd, cpus, AF_INET6, SOCK_DGRAM);
+
+	fprintf(stderr, "---- IPv4 TCP ----\n");
+	test(rcv_fd, cpus, AF_INET, SOCK_STREAM);
+
+	fprintf(stderr, "---- IPv6 TCP ----\n");
+	test(rcv_fd, cpus, AF_INET6, SOCK_STREAM);
+
+	free(rcv_fd);
+
+	fprintf(stderr, "SUCCESS\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
new file mode 100644
index 000000000..c9f478b40
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test functionality of BPF filters with SO_REUSEPORT. Same test as
+ * in reuseport_bpf_cpu, only as one socket per NUMA node.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <numa.h>
+
+#include "../kselftest.h"
+
+static const int PORT = 8888;
+
+static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
+{
+	struct sockaddr_storage addr;
+	struct sockaddr_in  *addr4;
+	struct sockaddr_in6 *addr6;
+	size_t i;
+	int opt;
+
+	switch (family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)&addr;
+		addr4->sin_family = AF_INET;
+		addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+		addr4->sin_port = htons(PORT);
+		break;
+	case AF_INET6:
+		addr6 = (struct sockaddr_in6 *)&addr;
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_addr = in6addr_any;
+		addr6->sin6_port = htons(PORT);
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+	}
+
+	for (i = 0; i < len; ++i) {
+		rcv_fd[i] = socket(family, proto, 0);
+		if (rcv_fd[i] < 0)
+			error(1, errno, "failed to create receive socket");
+
+		opt = 1;
+		if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+			       sizeof(opt)))
+			error(1, errno, "failed to set SO_REUSEPORT");
+
+		if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr)))
+			error(1, errno, "failed to bind receive socket");
+
+		if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10))
+			error(1, errno, "failed to listen on receive port");
+	}
+}
+
+static void attach_bpf(int fd)
+{
+	static char bpf_log_buf[65536];
+	static const char bpf_license[] = "";
+
+	int bpf_fd;
+	const struct bpf_insn prog[] = {
+		/* R0 = bpf_get_numa_node_id() */
+		{ BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_numa_node_id },
+		/* return R0 */
+		{ BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+	};
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+	attr.insns = (unsigned long) &prog;
+	attr.license = (unsigned long) &bpf_license;
+	attr.log_buf = (unsigned long) &bpf_log_buf;
+	attr.log_size = sizeof(bpf_log_buf);
+	attr.log_level = 1;
+
+	bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+	if (bpf_fd < 0)
+		error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
+
+	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+			sizeof(bpf_fd)))
+		error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+
+	close(bpf_fd);
+}
+
+static void send_from_node(int node_id, int family, int proto)
+{
+	struct sockaddr_storage saddr, daddr;
+	struct sockaddr_in  *saddr4, *daddr4;
+	struct sockaddr_in6 *saddr6, *daddr6;
+	int fd;
+
+	switch (family) {
+	case AF_INET:
+		saddr4 = (struct sockaddr_in *)&saddr;
+		saddr4->sin_family = AF_INET;
+		saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
+		saddr4->sin_port = 0;
+
+		daddr4 = (struct sockaddr_in *)&daddr;
+		daddr4->sin_family = AF_INET;
+		daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		daddr4->sin_port = htons(PORT);
+		break;
+	case AF_INET6:
+		saddr6 = (struct sockaddr_in6 *)&saddr;
+		saddr6->sin6_family = AF_INET6;
+		saddr6->sin6_addr = in6addr_any;
+		saddr6->sin6_port = 0;
+
+		daddr6 = (struct sockaddr_in6 *)&daddr;
+		daddr6->sin6_family = AF_INET6;
+		daddr6->sin6_addr = in6addr_loopback;
+		daddr6->sin6_port = htons(PORT);
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+	}
+
+	if (numa_run_on_node(node_id) < 0)
+		error(1, errno, "failed to pin to node");
+
+	fd = socket(family, proto, 0);
+	if (fd < 0)
+		error(1, errno, "failed to create send socket");
+
+	if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+		error(1, errno, "failed to bind send socket");
+
+	if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+		error(1, errno, "failed to connect send socket");
+
+	if (send(fd, "a", 1, 0) < 0)
+		error(1, errno, "failed to send message");
+
+	close(fd);
+}
+
+static
+void receive_on_node(int *rcv_fd, int len, int epfd, int node_id, int proto)
+{
+	struct epoll_event ev;
+	int i, fd;
+	char buf[8];
+
+	i = epoll_wait(epfd, &ev, 1, -1);
+	if (i < 0)
+		error(1, errno, "epoll_wait failed");
+
+	if (proto == SOCK_STREAM) {
+		fd = accept(ev.data.fd, NULL, NULL);
+		if (fd < 0)
+			error(1, errno, "failed to accept");
+		i = recv(fd, buf, sizeof(buf), 0);
+		close(fd);
+	} else {
+		i = recv(ev.data.fd, buf, sizeof(buf), 0);
+	}
+
+	if (i < 0)
+		error(1, errno, "failed to recv");
+
+	for (i = 0; i < len; ++i)
+		if (ev.data.fd == rcv_fd[i])
+			break;
+	if (i == len)
+		error(1, 0, "failed to find socket");
+	fprintf(stderr, "send node %d, receive socket %d\n", node_id, i);
+	if (node_id != i)
+		error(1, 0, "node id/receive socket mismatch");
+}
+
+static void test(int *rcv_fd, int len, int family, int proto)
+{
+	struct epoll_event ev;
+	int epfd, node;
+
+	build_rcv_group(rcv_fd, len, family, proto);
+	attach_bpf(rcv_fd[0]);
+
+	epfd = epoll_create(1);
+	if (epfd < 0)
+		error(1, errno, "failed to create epoll");
+	for (node = 0; node < len; ++node) {
+		ev.events = EPOLLIN;
+		ev.data.fd = rcv_fd[node];
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[node], &ev))
+			error(1, errno, "failed to register sock epoll");
+	}
+
+	/* Forward iterate */
+	for (node = 0; node < len; ++node) {
+		send_from_node(node, family, proto);
+		receive_on_node(rcv_fd, len, epfd, node, proto);
+	}
+
+	/* Reverse iterate */
+	for (node = len - 1; node >= 0; --node) {
+		send_from_node(node, family, proto);
+		receive_on_node(rcv_fd, len, epfd, node, proto);
+	}
+
+	close(epfd);
+	for (node = 0; node < len; ++node)
+		close(rcv_fd[node]);
+}
+
+int main(void)
+{
+	int *rcv_fd, nodes;
+
+	if (numa_available() < 0)
+		ksft_exit_skip("no numa api support\n");
+
+	nodes = numa_max_node() + 1;
+
+	rcv_fd = calloc(nodes, sizeof(int));
+	if (!rcv_fd)
+		error(1, 0, "failed to allocate array");
+
+	fprintf(stderr, "---- IPv4 UDP ----\n");
+	test(rcv_fd, nodes, AF_INET, SOCK_DGRAM);
+
+	fprintf(stderr, "---- IPv6 UDP ----\n");
+	test(rcv_fd, nodes, AF_INET6, SOCK_DGRAM);
+
+	fprintf(stderr, "---- IPv4 TCP ----\n");
+	test(rcv_fd, nodes, AF_INET, SOCK_STREAM);
+
+	fprintf(stderr, "---- IPv6 TCP ----\n");
+	test(rcv_fd, nodes, AF_INET6, SOCK_STREAM);
+
+	free(rcv_fd);
+
+	fprintf(stderr, "SUCCESS\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c
new file mode 100644
index 000000000..fb7a59ed7
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_dualstack.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * It is possible to use SO_REUSEPORT to open multiple sockets bound to
+ * equivalent local addresses using AF_INET and AF_INET6 at the same time.  If
+ * the AF_INET6 socket has IPV6_V6ONLY set, it's clear which socket should
+ * receive a given incoming packet.  However, when it is not set, incoming v4
+ * packets should prefer the AF_INET socket(s).  This behavior was defined with
+ * the original SO_REUSEPORT implementation, but broke with
+ * e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
+ * This test creates these mixed AF_INET/AF_INET6 sockets and asserts the
+ * AF_INET preference for v4 packets.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const int PORT = 8888;
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds, int count)
+{
+	struct sockaddr_storage addr;
+	struct sockaddr_in  *addr4;
+	struct sockaddr_in6 *addr6;
+	int opt, i;
+
+	switch (family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)&addr;
+		addr4->sin_family = AF_INET;
+		addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+		addr4->sin_port = htons(PORT);
+		break;
+	case AF_INET6:
+		addr6 = (struct sockaddr_in6 *)&addr;
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_addr = in6addr_any;
+		addr6->sin6_port = htons(PORT);
+		break;
+	default:
+		error(1, 0, "Unsupported family %d", family);
+	}
+
+	for (i = 0; i < count; ++i) {
+		rcv_fds[i] = socket(family, proto, 0);
+		if (rcv_fds[i] < 0)
+			error(1, errno, "failed to create receive socket");
+
+		opt = 1;
+		if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+			       sizeof(opt)))
+			error(1, errno, "failed to set SO_REUSEPORT");
+
+		if (bind(rcv_fds[i], (struct sockaddr *)&addr, sizeof(addr)))
+			error(1, errno, "failed to bind receive socket");
+
+		if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+			error(1, errno, "failed to listen on receive port");
+	}
+}
+
+static void send_from_v4(int proto)
+{
+	struct sockaddr_in  saddr, daddr;
+	int fd;
+
+	saddr.sin_family = AF_INET;
+	saddr.sin_addr.s_addr = htonl(INADDR_ANY);
+	saddr.sin_port = 0;
+
+	daddr.sin_family = AF_INET;
+	daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	daddr.sin_port = htons(PORT);
+
+	fd = socket(AF_INET, proto, 0);
+	if (fd < 0)
+		error(1, errno, "failed to create send socket");
+
+	if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+		error(1, errno, "failed to bind send socket");
+
+	if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+		error(1, errno, "failed to connect send socket");
+
+	if (send(fd, "a", 1, 0) < 0)
+		error(1, errno, "failed to send message");
+
+	close(fd);
+}
+
+static int receive_once(int epfd, int proto)
+{
+	struct epoll_event ev;
+	int i, fd;
+	char buf[8];
+
+	i = epoll_wait(epfd, &ev, 1, -1);
+	if (i < 0)
+		error(1, errno, "epoll_wait failed");
+
+	if (proto == SOCK_STREAM) {
+		fd = accept(ev.data.fd, NULL, NULL);
+		if (fd < 0)
+			error(1, errno, "failed to accept");
+		i = recv(fd, buf, sizeof(buf), 0);
+		close(fd);
+	} else {
+		i = recv(ev.data.fd, buf, sizeof(buf), 0);
+	}
+
+	if (i < 0)
+		error(1, errno, "failed to recv");
+
+	return ev.data.fd;
+}
+
+static void test(int *rcv_fds, int count, int proto)
+{
+	struct epoll_event ev;
+	int epfd, i, test_fd;
+	int test_family;
+	socklen_t len;
+
+	epfd = epoll_create(1);
+	if (epfd < 0)
+		error(1, errno, "failed to create epoll");
+
+	ev.events = EPOLLIN;
+	for (i = 0; i < count; ++i) {
+		ev.data.fd = rcv_fds[i];
+		if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+			error(1, errno, "failed to register sock epoll");
+	}
+
+	send_from_v4(proto);
+
+	test_fd = receive_once(epfd, proto);
+	len = sizeof(test_family);
+	if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len))
+		error(1, errno, "failed to read socket domain");
+	if (test_family != AF_INET)
+		error(1, 0, "expected to receive on v4 socket but got v6 (%d)",
+		      test_family);
+
+	close(epfd);
+}
+
+int main(void)
+{
+	int rcv_fds[32], i;
+
+	fprintf(stderr, "---- UDP IPv4 created before IPv6 ----\n");
+	build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 5);
+	build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[5]), 5);
+	test(rcv_fds, 10, SOCK_DGRAM);
+	for (i = 0; i < 10; ++i)
+		close(rcv_fds[i]);
+
+	fprintf(stderr, "---- UDP IPv6 created before IPv4 ----\n");
+	build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 5);
+	build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[5]), 5);
+	test(rcv_fds, 10, SOCK_DGRAM);
+	for (i = 0; i < 10; ++i)
+		close(rcv_fds[i]);
+
+	/* NOTE: UDP socket lookups traverse a different code path when there
+	 * are > 10 sockets in a group.
+	 */
+	fprintf(stderr, "---- UDP IPv4 created before IPv6 (large) ----\n");
+	build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 16);
+	build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[16]), 16);
+	test(rcv_fds, 32, SOCK_DGRAM);
+	for (i = 0; i < 32; ++i)
+		close(rcv_fds[i]);
+
+	fprintf(stderr, "---- UDP IPv6 created before IPv4 (large) ----\n");
+	build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 16);
+	build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[16]), 16);
+	test(rcv_fds, 32, SOCK_DGRAM);
+	for (i = 0; i < 32; ++i)
+		close(rcv_fds[i]);
+
+	fprintf(stderr, "---- TCP IPv4 created before IPv6 ----\n");
+	build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 5);
+	build_rcv_fd(AF_INET6, SOCK_STREAM, &(rcv_fds[5]), 5);
+	test(rcv_fds, 10, SOCK_STREAM);
+	for (i = 0; i < 10; ++i)
+		close(rcv_fds[i]);
+
+	fprintf(stderr, "---- TCP IPv6 created before IPv4 ----\n");
+	build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds, 5);
+	build_rcv_fd(AF_INET, SOCK_STREAM, &(rcv_fds[5]), 5);
+	test(rcv_fds, 10, SOCK_STREAM);
+	for (i = 0; i < 10; ++i)
+		close(rcv_fds[i]);
+
+	fprintf(stderr, "SUCCESS\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
new file mode 100755
index 000000000..ff665de78
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -0,0 +1,1023 @@
+#!/bin/bash
+#
+# This test is for checking rtnetlink callpaths, and get as much coverage as possible.
+#
+# set -e
+
+devdummy="test-dummy0"
+ret=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# set global exit status, but never reset nonzero one.
+check_err()
+{
+	if [ $ret -eq 0 ]; then
+		ret=$1
+	fi
+}
+
+# same but inverted -- used when command must fail for test to pass
+check_fail()
+{
+	if [ $1 -eq 0 ]; then
+		ret=1
+	fi
+}
+
+kci_add_dummy()
+{
+	ip link add name "$devdummy" type dummy
+	check_err $?
+	ip link set "$devdummy" up
+	check_err $?
+}
+
+kci_del_dummy()
+{
+	ip link del dev "$devdummy"
+	check_err $?
+}
+
+kci_test_netconf()
+{
+	dev="$1"
+	r=$ret
+
+	ip netconf show dev "$dev" > /dev/null
+	check_err $?
+
+	for f in 4 6; do
+		ip -$f netconf show dev "$dev" > /dev/null
+		check_err $?
+	done
+
+	if [ $ret -ne 0 ] ;then
+		echo "FAIL: ip netconf show $dev"
+		test $r -eq 0 && ret=0
+		return 1
+	fi
+}
+
+# add a bridge with vlans on top
+kci_test_bridge()
+{
+	devbr="test-br0"
+	vlandev="testbr-vlan1"
+
+	ret=0
+	ip link add name "$devbr" type bridge
+	check_err $?
+
+	ip link set dev "$devdummy" master "$devbr"
+	check_err $?
+
+	ip link set "$devbr" up
+	check_err $?
+
+	ip link add link "$devbr" name "$vlandev" type vlan id 1
+	check_err $?
+	ip addr add dev "$vlandev" 10.200.7.23/30
+	check_err $?
+	ip -6 addr add dev "$vlandev" dead:42::1234/64
+	check_err $?
+	ip -d link > /dev/null
+	check_err $?
+	ip r s t all > /dev/null
+	check_err $?
+
+	for name in "$devbr" "$vlandev" "$devdummy" ; do
+		kci_test_netconf "$name"
+	done
+
+	ip -6 addr del dev "$vlandev" dead:42::1234/64
+	check_err $?
+
+	ip link del dev "$vlandev"
+	check_err $?
+	ip link del dev "$devbr"
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: bridge setup"
+		return 1
+	fi
+	echo "PASS: bridge setup"
+
+}
+
+kci_test_gre()
+{
+	gredev=neta
+	rem=10.42.42.1
+	loc=10.0.0.1
+
+	ret=0
+	ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
+	check_err $?
+	ip link set $gredev up
+	check_err $?
+	ip addr add 10.23.7.10 dev $gredev
+	check_err $?
+	ip route add 10.23.8.0/30 dev $gredev
+	check_err $?
+	ip addr add dev "$devdummy" 10.23.7.11/24
+	check_err $?
+	ip link > /dev/null
+	check_err $?
+	ip addr > /dev/null
+	check_err $?
+
+	kci_test_netconf "$gredev"
+
+	ip addr del dev "$devdummy" 10.23.7.11/24
+	check_err $?
+
+	ip link del $gredev
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: gre tunnel endpoint"
+		return 1
+	fi
+	echo "PASS: gre tunnel endpoint"
+}
+
+# tc uses rtnetlink too, for full tc testing
+# please see tools/testing/selftests/tc-testing.
+kci_test_tc()
+{
+	dev=lo
+	ret=0
+
+	tc qdisc add dev "$dev" root handle 1: htb
+	check_err $?
+	tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
+	check_err $?
+	tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
+	check_err $?
+	tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
+	check_err $?
+	tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
+	check_err $?
+	tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
+	check_err $?
+	tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
+	check_err $?
+	tc filter show dev "$dev" parent  1:0 > /dev/null
+	check_err $?
+	tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
+	check_err $?
+	tc filter show dev "$dev" parent  1:0 > /dev/null
+	check_err $?
+	tc qdisc del dev "$dev" root handle 1: htb
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: tc htb hierarchy"
+		return 1
+	fi
+	echo "PASS: tc htb hierarchy"
+
+}
+
+kci_test_polrouting()
+{
+	ret=0
+	ip rule add fwmark 1 lookup 100
+	check_err $?
+	ip route add local 0.0.0.0/0 dev lo table 100
+	check_err $?
+	ip r s t all > /dev/null
+	check_err $?
+	ip rule del fwmark 1 lookup 100
+	check_err $?
+	ip route del local 0.0.0.0/0 dev lo table 100
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: policy route test"
+		return 1
+	fi
+	echo "PASS: policy routing"
+}
+
+kci_test_route_get()
+{
+	ret=0
+
+	ip route get 127.0.0.1 > /dev/null
+	check_err $?
+	ip route get 127.0.0.1 dev "$devdummy" > /dev/null
+	check_err $?
+	ip route get ::1 > /dev/null
+	check_err $?
+	ip route get fe80::1 dev "$devdummy" > /dev/null
+	check_err $?
+	ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null
+	check_err $?
+	ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null
+	check_err $?
+	ip addr add dev "$devdummy" 10.23.7.11/24
+	check_err $?
+	ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
+	check_err $?
+	ip addr del dev "$devdummy" 10.23.7.11/24
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: route get"
+		return 1
+	fi
+
+	echo "PASS: route get"
+}
+
+kci_test_addrlft()
+{
+	for i in $(seq 10 100) ;do
+		lft=$(((RANDOM%3) + 1))
+		ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
+		check_err $?
+	done
+
+	sleep 5
+
+	ip addr show dev "$devdummy" | grep "10.23.11."
+	if [ $? -eq 0 ]; then
+		echo "FAIL: preferred_lft addresses remaining"
+		check_err 1
+		return
+	fi
+
+	echo "PASS: preferred_lft addresses have expired"
+}
+
+kci_test_addrlabel()
+{
+	ret=0
+
+	ip addrlabel add prefix dead::/64 dev lo label 1
+	check_err $?
+
+	ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1"
+	check_err $?
+
+	ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null
+	check_err $?
+
+	ip addrlabel add prefix dead::/64 label 1 2> /dev/null
+	check_err $?
+
+	ip addrlabel del prefix dead::/64 label 1 2> /dev/null
+	check_err $?
+
+	# concurrent add/delete
+	for i in $(seq 1 1000); do
+		ip addrlabel add prefix 1c3::/64 label 12345 2>/dev/null
+	done &
+
+	for i in $(seq 1 1000); do
+		ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
+	done
+
+	wait
+
+	ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: ipv6 addrlabel"
+		return 1
+	fi
+
+	echo "PASS: ipv6 addrlabel"
+}
+
+kci_test_ifalias()
+{
+	ret=0
+	namewant=$(uuidgen)
+	syspathname="/sys/class/net/$devdummy/ifalias"
+
+	ip link set dev "$devdummy" alias "$namewant"
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: cannot set interface alias of $devdummy to $namewant"
+		return 1
+	fi
+
+	ip link show "$devdummy" | grep -q "alias $namewant"
+	check_err $?
+
+	if [ -r "$syspathname" ] ; then
+		read namehave < "$syspathname"
+		if [ "$namewant" != "$namehave" ]; then
+			echo "FAIL: did set ifalias $namewant but got $namehave"
+			return 1
+		fi
+
+		namewant=$(uuidgen)
+		echo "$namewant" > "$syspathname"
+	        ip link show "$devdummy" | grep -q "alias $namewant"
+		check_err $?
+
+		# sysfs interface allows to delete alias again
+		echo "" > "$syspathname"
+
+	        ip link show "$devdummy" | grep -q "alias $namewant"
+		check_fail $?
+
+		for i in $(seq 1 100); do
+			uuidgen > "$syspathname" &
+		done
+
+		wait
+
+		# re-add the alias -- kernel should free mem when dummy dev is removed
+		ip link set dev "$devdummy" alias "$namewant"
+		check_err $?
+	fi
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: set interface alias $devdummy to $namewant"
+		return 1
+	fi
+
+	echo "PASS: set ifalias $namewant for $devdummy"
+}
+
+kci_test_vrf()
+{
+	vrfname="test-vrf"
+	ret=0
+
+	ip link show type vrf 2>/dev/null
+	if [ $? -ne 0 ]; then
+		echo "SKIP: vrf: iproute2 too old"
+		return $ksft_skip
+	fi
+
+	ip link add "$vrfname" type vrf table 10
+	check_err $?
+	if [ $ret -ne 0 ];then
+		echo "FAIL: can't add vrf interface, skipping test"
+		return 0
+	fi
+
+	ip -br link show type vrf | grep -q "$vrfname"
+	check_err $?
+	if [ $ret -ne 0 ];then
+		echo "FAIL: created vrf device not found"
+		return 1
+	fi
+
+	ip link set dev "$vrfname" up
+	check_err $?
+
+	ip link set dev "$devdummy" master "$vrfname"
+	check_err $?
+	ip link del dev "$vrfname"
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: vrf"
+		return 1
+	fi
+
+	echo "PASS: vrf"
+}
+
+kci_test_encap_vxlan()
+{
+	ret=0
+	vxlan="test-vxlan0"
+	vlan="test-vlan0"
+	testns="$1"
+
+	ip netns exec "$testns" ip link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
+		dev "$devdummy" dstport 4789 2>/dev/null
+	if [ $? -ne 0 ]; then
+		echo "FAIL: can't add vxlan interface, skipping test"
+		return 0
+	fi
+	check_err $?
+
+	ip netns exec "$testns" ip addr add 10.2.11.49/24 dev "$vxlan"
+	check_err $?
+
+	ip netns exec "$testns" ip link set up dev "$vxlan"
+	check_err $?
+
+	ip netns exec "$testns" ip link add link "$vxlan" name "$vlan" type vlan id 1
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$vxlan"
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: vxlan"
+		return 1
+	fi
+	echo "PASS: vxlan"
+}
+
+kci_test_encap_fou()
+{
+	ret=0
+	name="test-fou"
+	testns="$1"
+
+	ip fou help 2>&1 |grep -q 'Usage: ip fou'
+	if [ $? -ne 0 ];then
+		echo "SKIP: fou: iproute2 too old"
+		return $ksft_skip
+	fi
+
+	ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null
+	if [ $? -ne 0 ];then
+		echo "FAIL: can't add fou port 7777, skipping test"
+		return 1
+	fi
+
+	ip netns exec "$testns" ip fou add port 8888 ipproto 4
+	check_err $?
+
+	ip netns exec "$testns" ip fou del port 9999 2>/dev/null
+	check_fail $?
+
+	ip netns exec "$testns" ip fou del port 7777
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: fou"
+		return 1
+	fi
+
+	echo "PASS: fou"
+}
+
+# test various encap methods, use netns to avoid unwanted interference
+kci_test_encap()
+{
+	testns="testns"
+	ret=0
+
+	ip netns add "$testns"
+	if [ $? -ne 0 ]; then
+		echo "SKIP encap tests: cannot add net namespace $testns"
+		return $ksft_skip
+	fi
+
+	ip netns exec "$testns" ip link set lo up
+	check_err $?
+
+	ip netns exec "$testns" ip link add name "$devdummy" type dummy
+	check_err $?
+	ip netns exec "$testns" ip link set "$devdummy" up
+	check_err $?
+
+	kci_test_encap_vxlan "$testns"
+	kci_test_encap_fou "$testns"
+
+	ip netns del "$testns"
+}
+
+kci_test_macsec()
+{
+	msname="test_macsec0"
+	ret=0
+
+	ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+	if [ $? -ne 0 ]; then
+		echo "SKIP: macsec: iproute2 too old"
+		return $ksft_skip
+	fi
+
+	ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+	check_err $?
+	if [ $ret -ne 0 ];then
+		echo "FAIL: can't add macsec interface, skipping test"
+		return 1
+	fi
+
+	ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+	check_err $?
+
+	ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+	check_err $?
+
+	ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
+	check_err $?
+
+	ip macsec show > /dev/null
+	check_err $?
+
+	ip link del dev "$msname"
+	check_err $?
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: macsec"
+		return 1
+	fi
+
+	echo "PASS: macsec"
+}
+
+#-------------------------------------------------------------------
+# Example commands
+#   ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07 replay-window 32 \
+#            aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+#            sel src 14.0.0.52/24 dst 14.0.0.70/24
+#   ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+#            tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07
+#
+# Subcommands not tested
+#    ip x s update
+#    ip x s allocspi
+#    ip x s deleteall
+#    ip x p update
+#    ip x p deleteall
+#    ip x p set
+#-------------------------------------------------------------------
+kci_test_ipsec()
+{
+	ret=0
+	algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+	srcip=192.168.123.1
+	dstip=192.168.123.2
+	spi=7
+
+	ip addr add $srcip dev $devdummy
+
+	# flush to be sure there's nothing configured
+	ip x s flush ; ip x p flush
+	check_err $?
+
+	# start the monitor in the background
+	tmpfile=`mktemp /var/run/ipsectestXXX`
+	mpid=`(ip x m > $tmpfile & echo $!) 2>/dev/null`
+	sleep 0.2
+
+	ipsecid="proto esp src $srcip dst $dstip spi 0x07"
+	ip x s add $ipsecid \
+            mode transport reqid 0x07 replay-window 32 \
+            $algo sel src $srcip/24 dst $dstip/24
+	check_err $?
+
+	lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x s count | grep -q "SAD count 1"
+	check_err $?
+
+	lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x s delete $ipsecid
+	check_err $?
+
+	lines=`ip x s list | wc -l`
+	test $lines -eq 0
+	check_err $?
+
+	ipsecsel="dir out src $srcip/24 dst $dstip/24"
+	ip x p add $ipsecsel \
+		    tmpl proto esp src $srcip dst $dstip \
+		    spi 0x07 mode transport reqid 0x07
+	check_err $?
+
+	lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x p count | grep -q "SPD IN  0 OUT 1 FWD 0"
+	check_err $?
+
+	lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x p delete $ipsecsel
+	check_err $?
+
+	lines=`ip x p list | wc -l`
+	test $lines -eq 0
+	check_err $?
+
+	# check the monitor results
+	kill $mpid
+	lines=`wc -l $tmpfile | cut "-d " -f1`
+	test $lines -eq 20
+	check_err $?
+	rm -rf $tmpfile
+
+	# clean up any leftovers
+	ip x s flush
+	check_err $?
+	ip x p flush
+	check_err $?
+	ip addr del $srcip/32 dev $devdummy
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ipsec"
+		return 1
+	fi
+	echo "PASS: ipsec"
+}
+
+#-------------------------------------------------------------------
+# Example commands
+#   ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07 replay-window 32 \
+#            aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+#            sel src 14.0.0.52/24 dst 14.0.0.70/24
+#            offload dev sim1 dir out
+#   ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+#            tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07
+#
+#-------------------------------------------------------------------
+kci_test_ipsec_offload()
+{
+	ret=0
+	algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+	srcip=192.168.123.3
+	dstip=192.168.123.4
+	dev=simx1
+	sysfsd=/sys/kernel/debug/netdevsim/$dev
+	sysfsf=$sysfsd/ipsec
+
+	# setup netdevsim since dummydev doesn't have offload support
+	modprobe netdevsim
+	check_err $?
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ipsec_offload can't load netdevsim"
+		return 1
+	fi
+
+	ip link add $dev type netdevsim
+	ip addr add $srcip dev $dev
+	ip link set $dev up
+	if [ ! -d $sysfsd ] ; then
+		echo "FAIL: ipsec_offload can't create device $dev"
+		return 1
+	fi
+	if [ ! -f $sysfsf ] ; then
+		echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
+		return 1
+	fi
+
+	# flush to be sure there's nothing configured
+	ip x s flush ; ip x p flush
+
+	# create offloaded SAs, both in and out
+	ip x p add dir out src $srcip/24 dst $dstip/24 \
+	    tmpl proto esp src $srcip dst $dstip spi 9 \
+	    mode transport reqid 42
+	check_err $?
+	ip x p add dir out src $dstip/24 dst $srcip/24 \
+	    tmpl proto esp src $dstip dst $srcip spi 9 \
+	    mode transport reqid 42
+	check_err $?
+
+	ip x s add proto esp src $srcip dst $dstip spi 9 \
+	    mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \
+	    offload dev $dev dir out
+	check_err $?
+	ip x s add proto esp src $dstip dst $srcip spi 9 \
+	    mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \
+	    offload dev $dev dir in
+	check_err $?
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ipsec_offload can't create SA"
+		return 1
+	fi
+
+	# does offload show up in ip output
+	lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"`
+	if [ $lines -ne 2 ] ; then
+		echo "FAIL: ipsec_offload SA offload missing from list output"
+		check_err 1
+	fi
+
+	# use ping to exercise the Tx path
+	ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
+
+	# does driver have correct offload info
+	diff $sysfsf - << EOF
+SA count=2 tx=3
+sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000
+sa[0]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
+sa[0]    key=0x34333231 38373635 32313039 36353433
+sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0
+sa[1]    spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
+sa[1]    key=0x34333231 38373635 32313039 36353433
+EOF
+	if [ $? -ne 0 ] ; then
+		echo "FAIL: ipsec_offload incorrect driver data"
+		check_err 1
+	fi
+
+	# does offload get removed from driver
+	ip x s flush
+	ip x p flush
+	lines=`grep -c "SA count=0" $sysfsf`
+	if [ $lines -ne 1 ] ; then
+		echo "FAIL: ipsec_offload SA not removed from driver"
+		check_err 1
+	fi
+
+	# clean up any leftovers
+	ip link del $dev
+	rmmod netdevsim
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ipsec_offload"
+		return 1
+	fi
+	echo "PASS: ipsec_offload"
+}
+
+kci_test_gretap()
+{
+	testns="testns"
+	DEV_NS=gretap00
+	ret=0
+
+	ip netns add "$testns"
+	if [ $? -ne 0 ]; then
+		echo "SKIP gretap tests: cannot add net namespace $testns"
+		return $ksft_skip
+	fi
+
+	ip link help gretap 2>&1 | grep -q "^Usage:"
+	if [ $? -ne 0 ];then
+		echo "SKIP: gretap: iproute2 too old"
+		ip netns del "$testns"
+		return $ksft_skip
+	fi
+
+	# test native tunnel
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap seq \
+		key 102 local 172.16.1.100 remote 172.16.1.200
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test external mode
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap external
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: gretap"
+		ip netns del "$testns"
+		return 1
+	fi
+	echo "PASS: gretap"
+
+	ip netns del "$testns"
+}
+
+kci_test_ip6gretap()
+{
+	testns="testns"
+	DEV_NS=ip6gretap00
+	ret=0
+
+	ip netns add "$testns"
+	if [ $? -ne 0 ]; then
+		echo "SKIP ip6gretap tests: cannot add net namespace $testns"
+		return $ksft_skip
+	fi
+
+	ip link help ip6gretap 2>&1 | grep -q "^Usage:"
+	if [ $? -ne 0 ];then
+		echo "SKIP: ip6gretap: iproute2 too old"
+		ip netns del "$testns"
+		return $ksft_skip
+	fi
+
+	# test native tunnel
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap seq \
+		key 102 local fc00:100::1 remote fc00:100::2
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" fc00:200::1/96
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test external mode
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap external
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ip6gretap"
+		ip netns del "$testns"
+		return 1
+	fi
+	echo "PASS: ip6gretap"
+
+	ip netns del "$testns"
+}
+
+kci_test_erspan()
+{
+	testns="testns"
+	DEV_NS=erspan00
+	ret=0
+
+	ip link help erspan 2>&1 | grep -q "^Usage:"
+	if [ $? -ne 0 ];then
+		echo "SKIP: erspan: iproute2 too old"
+		return $ksft_skip
+	fi
+
+	ip netns add "$testns"
+	if [ $? -ne 0 ]; then
+		echo "SKIP erspan tests: cannot add net namespace $testns"
+		return $ksft_skip
+	fi
+
+	# test native tunnel erspan v1
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \
+		key 102 local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 1 erspan 488
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test native tunnel erspan v2
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \
+		key 102 local 172.16.1.100 remote 172.16.1.200 \
+		erspan_ver 2 erspan_dir ingress erspan_hwid 7
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test external mode
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan external
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: erspan"
+		ip netns del "$testns"
+		return 1
+	fi
+	echo "PASS: erspan"
+
+	ip netns del "$testns"
+}
+
+kci_test_ip6erspan()
+{
+	testns="testns"
+	DEV_NS=ip6erspan00
+	ret=0
+
+	ip link help ip6erspan 2>&1 | grep -q "^Usage:"
+	if [ $? -ne 0 ];then
+		echo "SKIP: ip6erspan: iproute2 too old"
+		return $ksft_skip
+	fi
+
+	ip netns add "$testns"
+	if [ $? -ne 0 ]; then
+		echo "SKIP ip6erspan tests: cannot add net namespace $testns"
+		return $ksft_skip
+	fi
+
+	# test native tunnel ip6erspan v1
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \
+		key 102 local fc00:100::1 remote fc00:100::2 \
+		erspan_ver 1 erspan 488
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test native tunnel ip6erspan v2
+	ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \
+		key 102 local fc00:100::1 remote fc00:100::2 \
+		erspan_ver 2 erspan_dir ingress erspan_hwid 7
+	check_err $?
+
+	ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+	check_err $?
+
+	ip netns exec "$testns" ip link set dev $DEV_NS up
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	# test external mode
+	ip netns exec "$testns" ip link add dev "$DEV_NS" \
+		type ip6erspan external
+	check_err $?
+
+	ip netns exec "$testns" ip link del "$DEV_NS"
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ip6erspan"
+		ip netns del "$testns"
+		return 1
+	fi
+	echo "PASS: ip6erspan"
+
+	ip netns del "$testns"
+}
+
+kci_test_rtnl()
+{
+	kci_add_dummy
+	if [ $ret -ne 0 ];then
+		echo "FAIL: cannot add dummy interface"
+		return 1
+	fi
+
+	kci_test_polrouting
+	kci_test_route_get
+	kci_test_addrlft
+	kci_test_tc
+	kci_test_gre
+	kci_test_gretap
+	kci_test_ip6gretap
+	kci_test_erspan
+	kci_test_ip6erspan
+	kci_test_bridge
+	kci_test_addrlabel
+	kci_test_ifalias
+	kci_test_vrf
+	kci_test_encap
+	kci_test_macsec
+	kci_test_ipsec
+	kci_test_ipsec_offload
+
+	kci_del_dummy
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip
+fi
+
+for x in ip tc;do
+	$x -Version 2>/dev/null >/dev/null
+	if [ $? -ne 0 ];then
+		echo "SKIP: Could not run test without the $x tool"
+		exit $ksft_skip
+	fi
+done
+
+kci_test_rtnl
+
+exit $ret
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
new file mode 100755
index 000000000..bea079edc
--- /dev/null
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -0,0 +1,27 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if [ $(id -u) != 0 ]; then
+	echo $msg must be run as root >&2
+	exit 0
+fi
+
+echo "--------------------"
+echo "running psock_fanout test"
+echo "--------------------"
+./in_netns.sh ./psock_fanout
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+else
+	echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running psock_tpacket test"
+echo "--------------------"
+./in_netns.sh ./psock_tpacket
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+else
+	echo "[PASS]"
+fi
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
new file mode 100755
index 000000000..14e41faf2
--- /dev/null
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+echo "--------------------"
+echo "running socket test"
+echo "--------------------"
+./socket
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exit 1
+else
+	echo "[PASS]"
+fi
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
new file mode 100644
index 000000000..afca1ead6
--- /dev/null
+++ b/tools/testing/selftests/net/socket.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+struct socket_testcase {
+	int	domain;
+	int	type;
+	int	protocol;
+
+	/* 0    = valid file descriptor
+	 * -foo = error foo
+	 */
+	int	expect;
+
+	/* If non-zero, accept EAFNOSUPPORT to handle the case
+	 * of the protocol not being configured into the kernel.
+	 */
+	int	nosupport_ok;
+};
+
+static struct socket_testcase tests[] = {
+	{ AF_MAX,  0,           0,           -EAFNOSUPPORT,    0 },
+	{ AF_INET, SOCK_STREAM, IPPROTO_TCP, 0,                1  },
+	{ AF_INET, SOCK_DGRAM,  IPPROTO_TCP, -EPROTONOSUPPORT, 1  },
+	{ AF_INET, SOCK_DGRAM,  IPPROTO_UDP, 0,                1  },
+	{ AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1  },
+};
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define ERR_STRING_SZ	64
+
+static int run_tests(void)
+{
+	char err_string1[ERR_STRING_SZ];
+	char err_string2[ERR_STRING_SZ];
+	int i, err;
+
+	err = 0;
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		struct socket_testcase *s = &tests[i];
+		int fd;
+
+		fd = socket(s->domain, s->type, s->protocol);
+		if (fd < 0) {
+			if (s->nosupport_ok &&
+			    errno == EAFNOSUPPORT)
+				continue;
+
+			if (s->expect < 0 &&
+			    errno == -s->expect)
+				continue;
+
+			strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
+			strerror_r(errno, err_string2, ERR_STRING_SZ);
+
+			fprintf(stderr, "socket(%d, %d, %d) expected "
+				"err (%s) got (%s)\n",
+				s->domain, s->type, s->protocol,
+				err_string1, err_string2);
+
+			err = -1;
+			break;
+		} else {
+			close(fd);
+
+			if (s->expect < 0) {
+				strerror_r(errno, err_string1, ERR_STRING_SZ);
+
+				fprintf(stderr, "socket(%d, %d, %d) expected "
+					"success got err (%s)\n",
+					s->domain, s->type, s->protocol,
+					err_string1);
+
+				err = -1;
+				break;
+			}
+		}
+	}
+
+	return err;
+}
+
+int main(void)
+{
+	int err = run_tests();
+
+	return err;
+}
diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c
new file mode 100644
index 000000000..d044b29dd
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_inq.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Soheil Hassas Yeganeh (soheil@google.com)
+ *
+ * Simple example on how to use TCP_INQ and TCP_CM_INQ.
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ */
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#ifndef TCP_INQ
+#define TCP_INQ 36
+#endif
+
+#ifndef TCP_CM_INQ
+#define TCP_CM_INQ TCP_INQ
+#endif
+
+#define BUF_SIZE 8192
+#define CMSG_SIZE 32
+
+static int family = AF_INET6;
+static socklen_t addr_len = sizeof(struct sockaddr_in6);
+static int port = 4974;
+
+static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (family) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		addr4->sin_port = htons(port);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_addr = in6addr_loopback;
+		addr6->sin6_port = htons(port);
+		break;
+	default:
+		error(1, 0, "illegal family");
+	}
+}
+
+void *start_server(void *arg)
+{
+	int server_fd = (int)(unsigned long)arg;
+	struct sockaddr_in addr;
+	socklen_t addrlen = sizeof(addr);
+	char *buf;
+	int fd;
+	int r;
+
+	buf = malloc(BUF_SIZE);
+
+	for (;;) {
+		fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+		if (fd == -1) {
+			perror("accept");
+			break;
+		}
+		do {
+			r = send(fd, buf, BUF_SIZE, 0);
+		} while (r < 0 && errno == EINTR);
+		if (r < 0)
+			perror("send");
+		if (r != BUF_SIZE)
+			fprintf(stderr, "can only send %d bytes\n", r);
+		/* TCP_INQ can overestimate in-queue by one byte if we send
+		 * the FIN packet. Sleep for 1 second, so that the client
+		 * likely invoked recvmsg().
+		 */
+		sleep(1);
+		close(fd);
+	}
+
+	free(buf);
+	close(server_fd);
+	pthread_exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_storage listen_addr, addr;
+	int c, one = 1, inq = -1;
+	pthread_t server_thread;
+	char cmsgbuf[CMSG_SIZE];
+	struct iovec iov[1];
+	struct cmsghdr *cm;
+	struct msghdr msg;
+	int server_fd, fd;
+	char *buf;
+
+	while ((c = getopt(argc, argv, "46p:")) != -1) {
+		switch (c) {
+		case '4':
+			family = PF_INET;
+			addr_len = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			family = PF_INET6;
+			addr_len = sizeof(struct sockaddr_in6);
+			break;
+		case 'p':
+			port = atoi(optarg);
+			break;
+		}
+	}
+
+	server_fd = socket(family, SOCK_STREAM, 0);
+	if (server_fd < 0)
+		error(1, errno, "server socket");
+	setup_loopback_addr(family, &listen_addr);
+	if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR,
+		       &one, sizeof(one)) != 0)
+		error(1, errno, "setsockopt(SO_REUSEADDR)");
+	if (bind(server_fd, (const struct sockaddr *)&listen_addr,
+		 addr_len) == -1)
+		error(1, errno, "bind");
+	if (listen(server_fd, 128) == -1)
+		error(1, errno, "listen");
+	if (pthread_create(&server_thread, NULL, start_server,
+			   (void *)(unsigned long)server_fd) != 0)
+		error(1, errno, "pthread_create");
+
+	fd = socket(family, SOCK_STREAM, 0);
+	if (fd < 0)
+		error(1, errno, "client socket");
+	setup_loopback_addr(family, &addr);
+	if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1)
+		error(1, errno, "connect");
+	if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0)
+		error(1, errno, "setsockopt(TCP_INQ)");
+
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_iov = iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = sizeof(cmsgbuf);
+	msg.msg_flags = 0;
+
+	buf = malloc(BUF_SIZE);
+	iov[0].iov_base = buf;
+	iov[0].iov_len = BUF_SIZE / 2;
+
+	if (recvmsg(fd, &msg, 0) != iov[0].iov_len)
+		error(1, errno, "recvmsg");
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, 0, "control message is truncated");
+
+	for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm))
+		if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ)
+			inq = *((int *) CMSG_DATA(cm));
+
+	if (inq != BUF_SIZE - iov[0].iov_len) {
+		fprintf(stderr, "unexpected inq: %d\n", inq);
+		exit(1);
+	}
+
+	printf("PASSED\n");
+	free(buf);
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
new file mode 100644
index 000000000..e8c5dff44
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Eric Dumazet (edumazet@google.com)
+ *
+ * Reference program demonstrating tcp mmap() usage,
+ * and SO_RCVLOWAT hints for receiver.
+ *
+ * Note : NIC with header split is needed to use mmap() on TCP :
+ * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
+ *
+ * How to use on loopback interface :
+ *
+ *  ifconfig lo mtu 61512  # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
+ *  tcp_mmap -s -z &
+ *  tcp_mmap -H ::1 -z
+ *
+ *  Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
+ *      (4096 : page size on x86, 12: TCP TS option length)
+ *  tcp_mmap -s -z -M $((4096+12)) &
+ *  tcp_mmap -H ::1 -z -M $((4096+12))
+ *
+ * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
+ *       We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
+ *
+ * $ ./tcp_mmap -s &                                 # Without mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
+ *   cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
+ *  cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
+ * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
+ *   cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
+ *   cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
+ * $ kill %1   # kill tcp_mmap server
+ *
+ * $ ./tcp_mmap -s -z &                              # With mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
+ *   cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
+ *   cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
+ *   cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
+ *   cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <error.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <poll.h>
+#include <linux/tcp.h>
+#include <assert.h>
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY    0x4000000
+#endif
+
+#define FILE_SZ (1UL << 35)
+static int cfg_family = AF_INET6;
+static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
+static int cfg_port = 8787;
+
+static int rcvbuf; /* Default: autotuning.  Can be set with -r <integer> option */
+static int sndbuf; /* Default: autotuning.  Can be set with -w <integer> option */
+static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
+static int xflg; /* hash received data (simple xor) (-h option) */
+static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
+
+static int chunk_size  = 512*1024;
+
+unsigned long htotal;
+
+static inline void prefetch(const void *x)
+{
+#if defined(__x86_64__)
+	asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
+#endif
+}
+
+void hash_zone(void *zone, unsigned int length)
+{
+	unsigned long temp = htotal;
+
+	while (length >= 8*sizeof(long)) {
+		prefetch(zone + 384);
+		temp ^= *(unsigned long *)zone;
+		temp ^= *(unsigned long *)(zone + sizeof(long));
+		temp ^= *(unsigned long *)(zone + 2*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 3*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 4*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 5*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 6*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 7*sizeof(long));
+		zone += 8*sizeof(long);
+		length -= 8*sizeof(long);
+	}
+	while (length >= 1) {
+		temp ^= *(unsigned char *)zone;
+		zone += 1;
+		length--;
+	}
+	htotal = temp;
+}
+
+void *child_thread(void *arg)
+{
+	unsigned long total_mmap = 0, total = 0;
+	struct tcp_zerocopy_receive zc;
+	unsigned long delta_usec;
+	int flags = MAP_SHARED;
+	struct timeval t0, t1;
+	char *buffer = NULL;
+	void *addr = NULL;
+	double throughput;
+	struct rusage ru;
+	int lu, fd;
+
+	fd = (int)(unsigned long)arg;
+
+	gettimeofday(&t0, NULL);
+
+	fcntl(fd, F_SETFL, O_NDELAY);
+	buffer = malloc(chunk_size);
+	if (!buffer) {
+		perror("malloc");
+		goto error;
+	}
+	if (zflg) {
+		addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
+		if (addr == (void *)-1)
+			zflg = 0;
+	}
+	while (1) {
+		struct pollfd pfd = { .fd = fd, .events = POLLIN, };
+		int sub;
+
+		poll(&pfd, 1, 10000);
+		if (zflg) {
+			socklen_t zc_len = sizeof(zc);
+			int res;
+
+			zc.address = (__u64)addr;
+			zc.length = chunk_size;
+			zc.recv_skip_hint = 0;
+			res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
+					 &zc, &zc_len);
+			if (res == -1)
+				break;
+
+			if (zc.length) {
+				assert(zc.length <= chunk_size);
+				total_mmap += zc.length;
+				if (xflg)
+					hash_zone(addr, zc.length);
+				total += zc.length;
+			}
+			if (zc.recv_skip_hint) {
+				assert(zc.recv_skip_hint <= chunk_size);
+				lu = read(fd, buffer, zc.recv_skip_hint);
+				if (lu > 0) {
+					if (xflg)
+						hash_zone(buffer, lu);
+					total += lu;
+				}
+			}
+			continue;
+		}
+		sub = 0;
+		while (sub < chunk_size) {
+			lu = read(fd, buffer + sub, chunk_size - sub);
+			if (lu == 0)
+				goto end;
+			if (lu < 0)
+				break;
+			if (xflg)
+				hash_zone(buffer + sub, lu);
+			total += lu;
+			sub += lu;
+		}
+	}
+end:
+	gettimeofday(&t1, NULL);
+	delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
+
+	throughput = 0;
+	if (delta_usec)
+		throughput = total * 8.0 / (double)delta_usec / 1000.0;
+	getrusage(RUSAGE_THREAD, &ru);
+	if (total > 1024*1024) {
+		unsigned long total_usec;
+		unsigned long mb = total >> 20;
+		total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
+			     1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
+		printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
+		       "  cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
+				total / (1024.0 * 1024.0),
+				100.0*total_mmap/total,
+				(double)delta_usec / 1000000.0,
+				throughput,
+				(double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
+				(double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
+				(double)total_usec/mb,
+				ru.ru_nvcsw);
+	}
+error:
+	free(buffer);
+	close(fd);
+	if (zflg)
+		munmap(addr, chunk_size);
+	pthread_exit(0);
+}
+
+static void apply_rcvsnd_buf(int fd)
+{
+	if (rcvbuf && setsockopt(fd, SOL_SOCKET,
+				 SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
+		perror("setsockopt SO_RCVBUF");
+	}
+
+	if (sndbuf && setsockopt(fd, SOL_SOCKET,
+				 SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
+		perror("setsockopt SO_SNDBUF");
+	}
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+			   struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static void do_accept(int fdlisten)
+{
+	if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
+		       &chunk_size, sizeof(chunk_size)) == -1) {
+		perror("setsockopt SO_RCVLOWAT");
+	}
+
+	apply_rcvsnd_buf(fdlisten);
+
+	while (1) {
+		struct sockaddr_in addr;
+		socklen_t addrlen = sizeof(addr);
+		pthread_t th;
+		int fd, res;
+
+		fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
+		if (fd == -1) {
+			perror("accept");
+			continue;
+		}
+		res = pthread_create(&th, NULL, child_thread,
+				     (void *)(unsigned long)fd);
+		if (res) {
+			errno = res;
+			perror("pthread_create");
+			close(fd);
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_storage listenaddr, addr;
+	unsigned int max_pacing_rate = 0;
+	unsigned long total = 0;
+	char *host = NULL;
+	int fd, c, on = 1;
+	char *buffer;
+	int sflg = 0;
+	int mss = 0;
+
+	while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'p':
+			cfg_port = atoi(optarg);
+			break;
+		case 'H':
+			host = optarg;
+			break;
+		case 's': /* server : listen for incoming connections */
+			sflg++;
+			break;
+		case 'r':
+			rcvbuf = atoi(optarg);
+			break;
+		case 'w':
+			sndbuf = atoi(optarg);
+			break;
+		case 'z':
+			zflg = 1;
+			break;
+		case 'M':
+			mss = atoi(optarg);
+			break;
+		case 'x':
+			xflg = 1;
+			break;
+		case 'k':
+			keepflag = 1;
+			break;
+		case 'P':
+			max_pacing_rate = atoi(optarg) ;
+			break;
+		default:
+			exit(1);
+		}
+	}
+	if (sflg) {
+		int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
+
+		if (fdlisten == -1) {
+			perror("socket");
+			exit(1);
+		}
+		apply_rcvsnd_buf(fdlisten);
+		setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+
+		setup_sockaddr(cfg_family, host, &listenaddr);
+
+		if (mss &&
+		    setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
+			       &mss, sizeof(mss)) == -1) {
+			perror("setsockopt TCP_MAXSEG");
+			exit(1);
+		}
+		if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
+			perror("bind");
+			exit(1);
+		}
+		if (listen(fdlisten, 128) == -1) {
+			perror("listen");
+			exit(1);
+		}
+		do_accept(fdlisten);
+	}
+	buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
+			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (buffer == (char *)-1) {
+		perror("mmap");
+		exit(1);
+	}
+
+	fd = socket(cfg_family, SOCK_STREAM, 0);
+	if (fd == -1) {
+		perror("socket");
+		exit(1);
+	}
+	apply_rcvsnd_buf(fd);
+
+	setup_sockaddr(cfg_family, host, &addr);
+
+	if (mss &&
+	    setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+		perror("setsockopt TCP_MAXSEG");
+		exit(1);
+	}
+	if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
+		perror("connect");
+		exit(1);
+	}
+	if (max_pacing_rate &&
+	    setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
+		       &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
+		perror("setsockopt SO_MAX_PACING_RATE");
+
+	if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
+			       &on, sizeof(on)) == -1) {
+		perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
+		zflg = 0;
+	}
+	while (total < FILE_SZ) {
+		long wr = FILE_SZ - total;
+
+		if (wr > chunk_size)
+			wr = chunk_size;
+		/* Note : we just want to fill the pipe with 0 bytes */
+		wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+		if (wr <= 0)
+			break;
+		total += wr;
+	}
+	close(fd);
+	munmap(buffer, chunk_size);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/test_bpf.sh b/tools/testing/selftests/net/test_bpf.sh
new file mode 100755
index 000000000..65677909c
--- /dev/null
+++ b/tools/testing/selftests/net/test_bpf.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs bpf test using test_bpf kernel module
+
+if /sbin/modprobe -q test_bpf ; then
+	/sbin/modprobe -q -r test_bpf;
+	echo "test_bpf: ok";
+else
+	echo "test_bpf: [FAIL]";
+	exit 1;
+fi
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
new file mode 100644
index 000000000..7549d39cc
--- /dev/null
+++ b/tools/testing/selftests/net/tls.c
@@ -0,0 +1,741 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/tls.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+
+#include <sys/types.h>
+#include <sys/sendfile.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include "../kselftest_harness.h"
+
+#define TLS_PAYLOAD_MAX_LEN 16384
+#define SOL_TLS 282
+
+FIXTURE(tls)
+{
+	int fd, cfd;
+	bool notls;
+};
+
+FIXTURE_SETUP(tls)
+{
+	struct tls12_crypto_info_aes_gcm_128 tls12;
+	struct sockaddr_in addr;
+	socklen_t len;
+	int sfd, ret;
+
+	self->notls = false;
+	len = sizeof(addr);
+
+	memset(&tls12, 0, sizeof(tls12));
+	tls12.info.version = TLS_1_2_VERSION;
+	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(INADDR_ANY);
+	addr.sin_port = 0;
+
+	self->fd = socket(AF_INET, SOCK_STREAM, 0);
+	sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+	ret = bind(sfd, &addr, sizeof(addr));
+	ASSERT_EQ(ret, 0);
+	ret = listen(sfd, 10);
+	ASSERT_EQ(ret, 0);
+
+	ret = getsockname(sfd, &addr, &len);
+	ASSERT_EQ(ret, 0);
+
+	ret = connect(self->fd, &addr, sizeof(addr));
+	ASSERT_EQ(ret, 0);
+
+	ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+	if (ret != 0) {
+		self->notls = true;
+		printf("Failure setting TCP_ULP, testing without tls\n");
+	}
+
+	if (!self->notls) {
+		ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
+				 sizeof(tls12));
+		ASSERT_EQ(ret, 0);
+	}
+
+	self->cfd = accept(sfd, &addr, &len);
+	ASSERT_GE(self->cfd, 0);
+
+	if (!self->notls) {
+		ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
+				 sizeof("tls"));
+		ASSERT_EQ(ret, 0);
+
+		ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
+				 sizeof(tls12));
+		ASSERT_EQ(ret, 0);
+	}
+
+	close(sfd);
+}
+
+FIXTURE_TEARDOWN(tls)
+{
+	close(self->fd);
+	close(self->cfd);
+}
+
+TEST_F(tls, sendfile)
+{
+	int filefd = open("/proc/self/exe", O_RDONLY);
+	struct stat st;
+
+	EXPECT_GE(filefd, 0);
+	fstat(filefd, &st);
+	EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+}
+
+TEST_F(tls, send_then_sendfile)
+{
+	int filefd = open("/proc/self/exe", O_RDONLY);
+	char const *test_str = "test_send";
+	int to_send = strlen(test_str) + 1;
+	char recv_buf[10];
+	struct stat st;
+	char *buf;
+
+	EXPECT_GE(filefd, 0);
+	fstat(filefd, &st);
+	buf = (char *)malloc(st.st_size);
+
+	EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send);
+	EXPECT_EQ(recv(self->cfd, recv_buf, to_send, 0), to_send);
+	EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0);
+
+	EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+	EXPECT_EQ(recv(self->cfd, buf, st.st_size, 0), st.st_size);
+}
+
+TEST_F(tls, recv_max)
+{
+	unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
+	char recv_mem[TLS_PAYLOAD_MAX_LEN];
+	char buf[TLS_PAYLOAD_MAX_LEN];
+
+	EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
+	EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
+	EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
+}
+
+TEST_F(tls, recv_small)
+{
+	char const *test_str = "test_read";
+	int send_len = 10;
+	char buf[10];
+
+	send_len = strlen(test_str) + 1;
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, msg_more)
+{
+	char const *test_str = "test_read";
+	int send_len = 10;
+	char buf[10 * 2];
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_DONTWAIT),
+		  send_len * 2);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, sendmsg_single)
+{
+	struct msghdr msg;
+
+	char const *test_str = "test_sendmsg";
+	size_t send_len = 13;
+	struct iovec vec;
+	char buf[13];
+
+	vec.iov_base = (char *)test_str;
+	vec.iov_len = send_len;
+	memset(&msg, 0, sizeof(struct msghdr));
+	msg.msg_iov = &vec;
+	msg.msg_iovlen = 1;
+	EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, sendmsg_large)
+{
+	void *mem = malloc(16384);
+	size_t send_len = 16384;
+	size_t sends = 128;
+	struct msghdr msg;
+	size_t recvs = 0;
+	size_t sent = 0;
+
+	memset(&msg, 0, sizeof(struct msghdr));
+	while (sent++ < sends) {
+		struct iovec vec = { (void *)mem, send_len };
+
+		msg.msg_iov = &vec;
+		msg.msg_iovlen = 1;
+		EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len);
+	}
+
+	while (recvs++ < sends)
+		EXPECT_NE(recv(self->fd, mem, send_len, 0), -1);
+
+	free(mem);
+}
+
+TEST_F(tls, sendmsg_multiple)
+{
+	char const *test_str = "test_sendmsg_multiple";
+	struct iovec vec[5];
+	char *test_strs[5];
+	struct msghdr msg;
+	int total_len = 0;
+	int len_cmp = 0;
+	int iov_len = 5;
+	char *buf;
+	int i;
+
+	memset(&msg, 0, sizeof(struct msghdr));
+	for (i = 0; i < iov_len; i++) {
+		test_strs[i] = (char *)malloc(strlen(test_str) + 1);
+		snprintf(test_strs[i], strlen(test_str) + 1, "%s", test_str);
+		vec[i].iov_base = (void *)test_strs[i];
+		vec[i].iov_len = strlen(test_strs[i]) + 1;
+		total_len += vec[i].iov_len;
+	}
+	msg.msg_iov = vec;
+	msg.msg_iovlen = iov_len;
+
+	EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len);
+	buf = malloc(total_len);
+	EXPECT_NE(recv(self->fd, buf, total_len, 0), -1);
+	for (i = 0; i < iov_len; i++) {
+		EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp,
+				 strlen(test_strs[i])),
+			  0);
+		len_cmp += strlen(buf + len_cmp) + 1;
+	}
+	for (i = 0; i < iov_len; i++)
+		free(test_strs[i]);
+	free(buf);
+}
+
+TEST_F(tls, sendmsg_multiple_stress)
+{
+	char const *test_str = "abcdefghijklmno";
+	struct iovec vec[1024];
+	char *test_strs[1024];
+	int iov_len = 1024;
+	int total_len = 0;
+	char buf[1 << 14];
+	struct msghdr msg;
+	int len_cmp = 0;
+	int i;
+
+	memset(&msg, 0, sizeof(struct msghdr));
+	for (i = 0; i < iov_len; i++) {
+		test_strs[i] = (char *)malloc(strlen(test_str) + 1);
+		snprintf(test_strs[i], strlen(test_str) + 1, "%s", test_str);
+		vec[i].iov_base = (void *)test_strs[i];
+		vec[i].iov_len = strlen(test_strs[i]) + 1;
+		total_len += vec[i].iov_len;
+	}
+	msg.msg_iov = vec;
+	msg.msg_iovlen = iov_len;
+
+	EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len);
+	EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1);
+
+	for (i = 0; i < iov_len; i++)
+		len_cmp += strlen(buf + len_cmp) + 1;
+
+	for (i = 0; i < iov_len; i++)
+		free(test_strs[i]);
+}
+
+TEST_F(tls, splice_from_pipe)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_GE(write(p[1], mem_send, send_len), 0);
+	EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0);
+	EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_from_pipe2)
+{
+	int send_len = 16000;
+	char mem_send[16000];
+	char mem_recv[16000];
+	int p2[2];
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	ASSERT_GE(pipe(p2), 0);
+	EXPECT_GE(write(p[1], mem_send, 8000), 0);
+	EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
+	EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
+	EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
+	EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, send_and_splice)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	char const *test_str = "test_read";
+	int send_len2 = 10;
+	char buf[10];
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2);
+	EXPECT_EQ(recv(self->cfd, buf, send_len2, MSG_WAITALL), send_len2);
+	EXPECT_EQ(memcmp(test_str, buf, send_len2), 0);
+
+	EXPECT_GE(write(p[1], mem_send, send_len), send_len);
+	EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len);
+
+	EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_to_pipe)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char mem_send[TLS_PAYLOAD_MAX_LEN];
+	char mem_recv[TLS_PAYLOAD_MAX_LEN];
+	int p[2];
+
+	ASSERT_GE(pipe(p), 0);
+	EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0);
+	EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0);
+	EXPECT_GE(read(p[0], mem_recv, send_len), 0);
+	EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_single)
+{
+	char const *test_str = "test_recvmsg_single";
+	int send_len = strlen(test_str) + 1;
+	char buf[20];
+	struct msghdr hdr;
+	struct iovec vec;
+
+	memset(&hdr, 0, sizeof(hdr));
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	vec.iov_base = (char *)buf;
+	vec.iov_len = send_len;
+	hdr.msg_iovlen = 1;
+	hdr.msg_iov = &vec;
+	EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_single_max)
+{
+	int send_len = TLS_PAYLOAD_MAX_LEN;
+	char send_mem[TLS_PAYLOAD_MAX_LEN];
+	char recv_mem[TLS_PAYLOAD_MAX_LEN];
+	struct iovec vec;
+	struct msghdr hdr;
+
+	EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
+	vec.iov_base = (char *)recv_mem;
+	vec.iov_len = TLS_PAYLOAD_MAX_LEN;
+
+	hdr.msg_iovlen = 1;
+	hdr.msg_iov = &vec;
+	EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+	EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_multiple)
+{
+	unsigned int msg_iovlen = 1024;
+	unsigned int len_compared = 0;
+	struct iovec vec[1024];
+	char *iov_base[1024];
+	unsigned int iov_len = 16;
+	int send_len = 1 << 14;
+	char buf[1 << 14];
+	struct msghdr hdr;
+	int i;
+
+	EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
+	for (i = 0; i < msg_iovlen; i++) {
+		iov_base[i] = (char *)malloc(iov_len);
+		vec[i].iov_base = iov_base[i];
+		vec[i].iov_len = iov_len;
+	}
+
+	hdr.msg_iovlen = msg_iovlen;
+	hdr.msg_iov = vec;
+	EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+	for (i = 0; i < msg_iovlen; i++)
+		len_compared += iov_len;
+
+	for (i = 0; i < msg_iovlen; i++)
+		free(iov_base[i]);
+}
+
+TEST_F(tls, single_send_multiple_recv)
+{
+	unsigned int total_len = TLS_PAYLOAD_MAX_LEN * 2;
+	unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
+	char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
+	char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
+
+	EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
+	memset(recv_mem, 0, total_len);
+
+	EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
+	EXPECT_NE(recv(self->cfd, recv_mem + send_len, send_len, 0), -1);
+	EXPECT_EQ(memcmp(send_mem, recv_mem, total_len), 0);
+}
+
+TEST_F(tls, multiple_send_single_recv)
+{
+	unsigned int total_len = 2 * 10;
+	unsigned int send_len = 10;
+	char recv_mem[2 * 10];
+	char send_mem[10];
+
+	EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
+	EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
+	memset(recv_mem, 0, total_len);
+	EXPECT_EQ(recv(self->cfd, recv_mem, total_len, 0), total_len);
+
+	EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
+	EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0);
+}
+
+TEST_F(tls, recv_partial)
+{
+	char const *test_str = "test_read_partial";
+	char const *test_str_first = "test_read";
+	char const *test_str_second = "_partial";
+	int send_len = strlen(test_str) + 1;
+	char recv_mem[18];
+
+	memset(recv_mem, 0, sizeof(recv_mem));
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), 0), -1);
+	EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0);
+	memset(recv_mem, 0, sizeof(recv_mem));
+	EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), 0), -1);
+	EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)),
+		  0);
+}
+
+TEST_F(tls, recv_nonblock)
+{
+	char buf[4096];
+	bool err;
+
+	EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+	err = (errno == EAGAIN || errno == EWOULDBLOCK);
+	EXPECT_EQ(err, true);
+}
+
+TEST_F(tls, recv_peek)
+{
+	char const *test_str = "test_read_peek";
+	int send_len = strlen(test_str) + 1;
+	char buf[15];
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+	memset(buf, 0, sizeof(buf));
+	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_peek_multiple)
+{
+	char const *test_str = "test_read_peek";
+	int send_len = strlen(test_str) + 1;
+	unsigned int num_peeks = 100;
+	char buf[15];
+	int i;
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	for (i = 0; i < num_peeks; i++) {
+		EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+		EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+		memset(buf, 0, sizeof(buf));
+	}
+	EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+	EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_peek_multiple_records)
+{
+	char const *test_str = "test_read_peek_mult_recs";
+	char const *test_str_first = "test_read_peek";
+	char const *test_str_second = "_mult_recs";
+	int len;
+	char buf[64];
+
+	len = strlen(test_str_first);
+	EXPECT_EQ(send(self->fd, test_str_first, len, 0), len);
+
+	len = strlen(test_str_second) + 1;
+	EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+	len = strlen(test_str_first);
+	memset(buf, 0, len);
+	EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+	/* MSG_PEEK can only peek into the current record. */
+	len = strlen(test_str_first);
+	EXPECT_EQ(memcmp(test_str_first, buf, len), 0);
+
+	len = strlen(test_str) + 1;
+	memset(buf, 0, len);
+	EXPECT_EQ(recv(self->cfd, buf, len, MSG_WAITALL), len);
+
+	/* Non-MSG_PEEK will advance strparser (and therefore record)
+	 * however.
+	 */
+	len = strlen(test_str) + 1;
+	EXPECT_EQ(memcmp(test_str, buf, len), 0);
+
+	/* MSG_MORE will hold current record open, so later MSG_PEEK
+	 * will see everything.
+	 */
+	len = strlen(test_str_first);
+	EXPECT_EQ(send(self->fd, test_str_first, len, MSG_MORE), len);
+
+	len = strlen(test_str_second) + 1;
+	EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+	len = strlen(test_str) + 1;
+	memset(buf, 0, len);
+	EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+	len = strlen(test_str) + 1;
+	EXPECT_EQ(memcmp(test_str, buf, len), 0);
+}
+
+TEST_F(tls, pollin)
+{
+	char const *test_str = "test_poll";
+	struct pollfd fd = { 0, 0, 0 };
+	char buf[10];
+	int send_len = 10;
+
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	fd.fd = self->cfd;
+	fd.events = POLLIN;
+
+	EXPECT_EQ(poll(&fd, 1, 20), 1);
+	EXPECT_EQ(fd.revents & POLLIN, 1);
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+	/* Test timing out */
+	EXPECT_EQ(poll(&fd, 1, 20), 0);
+}
+
+TEST_F(tls, poll_wait)
+{
+	char const *test_str = "test_poll_wait";
+	int send_len = strlen(test_str) + 1;
+	struct pollfd fd = { 0, 0, 0 };
+	char recv_mem[15];
+
+	fd.fd = self->cfd;
+	fd.events = POLLIN;
+	EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+	/* Set timeout to inf. secs */
+	EXPECT_EQ(poll(&fd, 1, -1), 1);
+	EXPECT_EQ(fd.revents & POLLIN, 1);
+	EXPECT_EQ(recv(self->cfd, recv_mem, send_len, 0), send_len);
+}
+
+TEST_F(tls, blocking)
+{
+	size_t data = 100000;
+	int res = fork();
+
+	EXPECT_NE(res, -1);
+
+	if (res) {
+		/* parent */
+		size_t left = data;
+		char buf[16384];
+		int status;
+		int pid2;
+
+		while (left) {
+			int res = send(self->fd, buf,
+				       left > 16384 ? 16384 : left, 0);
+
+			EXPECT_GE(res, 0);
+			left -= res;
+		}
+
+		pid2 = wait(&status);
+		EXPECT_EQ(status, 0);
+		EXPECT_EQ(res, pid2);
+	} else {
+		/* child */
+		size_t left = data;
+		char buf[16384];
+
+		while (left) {
+			int res = recv(self->cfd, buf,
+				       left > 16384 ? 16384 : left, 0);
+
+			EXPECT_GE(res, 0);
+			left -= res;
+		}
+	}
+}
+
+TEST_F(tls, nonblocking)
+{
+	size_t data = 100000;
+	int sendbuf = 100;
+	int flags;
+	int res;
+
+	flags = fcntl(self->fd, F_GETFL, 0);
+	fcntl(self->fd, F_SETFL, flags | O_NONBLOCK);
+	fcntl(self->cfd, F_SETFL, flags | O_NONBLOCK);
+
+	/* Ensure nonblocking behavior by imposing a small send
+	 * buffer.
+	 */
+	EXPECT_EQ(setsockopt(self->fd, SOL_SOCKET, SO_SNDBUF,
+			     &sendbuf, sizeof(sendbuf)), 0);
+
+	res = fork();
+	EXPECT_NE(res, -1);
+
+	if (res) {
+		/* parent */
+		bool eagain = false;
+		size_t left = data;
+		char buf[16384];
+		int status;
+		int pid2;
+
+		while (left) {
+			int res = send(self->fd, buf,
+				       left > 16384 ? 16384 : left, 0);
+
+			if (res == -1 && errno == EAGAIN) {
+				eagain = true;
+				usleep(10000);
+				continue;
+			}
+			EXPECT_GE(res, 0);
+			left -= res;
+		}
+
+		EXPECT_TRUE(eagain);
+		pid2 = wait(&status);
+
+		EXPECT_EQ(status, 0);
+		EXPECT_EQ(res, pid2);
+	} else {
+		/* child */
+		bool eagain = false;
+		size_t left = data;
+		char buf[16384];
+
+		while (left) {
+			int res = recv(self->cfd, buf,
+				       left > 16384 ? 16384 : left, 0);
+
+			if (res == -1 && errno == EAGAIN) {
+				eagain = true;
+				usleep(10000);
+				continue;
+			}
+			EXPECT_GE(res, 0);
+			left -= res;
+		}
+		EXPECT_TRUE(eagain);
+	}
+}
+
+TEST_F(tls, control_msg)
+{
+	if (self->notls)
+		return;
+
+	char cbuf[CMSG_SPACE(sizeof(char))];
+	char const *test_str = "test_read";
+	int cmsg_len = sizeof(char);
+	char record_type = 100;
+	struct cmsghdr *cmsg;
+	struct msghdr msg;
+	int send_len = 10;
+	struct iovec vec;
+	char buf[10];
+
+	vec.iov_base = (char *)test_str;
+	vec.iov_len = 10;
+	memset(&msg, 0, sizeof(struct msghdr));
+	msg.msg_iov = &vec;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cbuf;
+	msg.msg_controllen = sizeof(cbuf);
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_level = SOL_TLS;
+	/* test sending non-record types. */
+	cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+	cmsg->cmsg_len = CMSG_LEN(cmsg_len);
+	*CMSG_DATA(cmsg) = record_type;
+	msg.msg_controllen = cmsg->cmsg_len;
+
+	EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+	/* Should fail because we didn't provide a control message */
+	EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+	vec.iov_base = buf;
+	EXPECT_EQ(recvmsg(self->cfd, &msg, 0), send_len);
+	cmsg = CMSG_FIRSTHDR(&msg);
+	EXPECT_NE(cmsg, NULL);
+	EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
+	EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
+	record_type = *((unsigned char *)CMSG_DATA(cmsg));
+	EXPECT_EQ(record_type, 100);
+	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
new file mode 100644
index 000000000..23177b643
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.c
@@ -0,0 +1,685 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <net/if.h>
+#include <linux/in.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU	0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT		103
+#endif
+
+#ifndef UDP_MAX_SEGMENTS
+#define UDP_MAX_SEGMENTS	(1 << 6UL)
+#endif
+
+#define CONST_MTU_TEST	1500
+
+#define CONST_HDRLEN_V4		(sizeof(struct iphdr) + sizeof(struct udphdr))
+#define CONST_HDRLEN_V6		(sizeof(struct ip6_hdr) + sizeof(struct udphdr))
+
+#define CONST_MSS_V4		(CONST_MTU_TEST - CONST_HDRLEN_V4)
+#define CONST_MSS_V6		(CONST_MTU_TEST - CONST_HDRLEN_V6)
+
+#define CONST_MAX_SEGS_V4	(ETH_MAX_MTU / CONST_MSS_V4)
+#define CONST_MAX_SEGS_V6	(ETH_MAX_MTU / CONST_MSS_V6)
+
+static bool		cfg_do_ipv4;
+static bool		cfg_do_ipv6;
+static bool		cfg_do_connected;
+static bool		cfg_do_connectionless;
+static bool		cfg_do_msgmore;
+static bool		cfg_do_setsockopt;
+static int		cfg_specific_test_id = -1;
+
+static const char	cfg_ifname[] = "lo";
+static unsigned short	cfg_port = 9000;
+
+static char buf[ETH_MAX_MTU];
+
+struct testcase {
+	int tlen;		/* send() buffer size, may exceed mss */
+	bool tfail;		/* send() call is expected to fail */
+	int gso_len;		/* mss after applying gso */
+	int r_num_mss;		/* recv(): number of calls of full mss */
+	int r_len_last;		/* recv(): size of last non-mss dgram, if any */
+};
+
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+
+struct testcase testcases_v4[] = {
+	{
+		/* no GSO: send a single byte */
+		.tlen = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* no GSO: send a single MSS */
+		.tlen = CONST_MSS_V4,
+		.r_num_mss = 1,
+	},
+	{
+		/* no GSO: send a single MSS + 1B: fail */
+		.tlen = CONST_MSS_V4 + 1,
+		.tfail = true,
+	},
+	{
+		/* send a single MSS: will fall back to no GSO */
+		.tlen = CONST_MSS_V4,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = 1,
+	},
+	{
+		/* send a single MSS + 1B */
+		.tlen = CONST_MSS_V4 + 1,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* send exactly 2 MSS */
+		.tlen = CONST_MSS_V4 * 2,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2 MSS + 1B */
+		.tlen = (CONST_MSS_V4 * 2) + 1,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send MAX segs */
+		.tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
+	},
+
+	{
+		/* send MAX bytes */
+		.tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = CONST_MAX_SEGS_V4,
+		.r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
+			      (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
+	},
+	{
+		/* send MAX + 1: fail */
+		.tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
+		.gso_len = CONST_MSS_V4,
+		.tfail = true,
+	},
+	{
+		/* send a single 1B MSS: will fall back to no GSO */
+		.tlen = 1,
+		.gso_len = 1,
+		.r_num_mss = 1,
+	},
+	{
+		/* send 2 1B segments */
+		.tlen = 2,
+		.gso_len = 1,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2B + 2B + 1B segments */
+		.tlen = 5,
+		.gso_len = 2,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send max number of min sized segments */
+		.tlen = UDP_MAX_SEGMENTS,
+		.gso_len = 1,
+		.r_num_mss = UDP_MAX_SEGMENTS,
+	},
+	{
+		/* send max number + 1 of min sized segments: fail */
+		.tlen = UDP_MAX_SEGMENTS + 1,
+		.gso_len = 1,
+		.tfail = true,
+	},
+	{
+		/* EOL */
+	}
+};
+
+#ifndef IP6_MAX_MTU
+#define IP6_MAX_MTU	(ETH_MAX_MTU + sizeof(struct ip6_hdr))
+#endif
+
+struct testcase testcases_v6[] = {
+	{
+		/* no GSO: send a single byte */
+		.tlen = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* no GSO: send a single MSS */
+		.tlen = CONST_MSS_V6,
+		.r_num_mss = 1,
+	},
+	{
+		/* no GSO: send a single MSS + 1B: fail */
+		.tlen = CONST_MSS_V6 + 1,
+		.tfail = true,
+	},
+	{
+		/* send a single MSS: will fall back to no GSO */
+		.tlen = CONST_MSS_V6,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = 1,
+	},
+	{
+		/* send a single MSS + 1B */
+		.tlen = CONST_MSS_V6 + 1,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* send exactly 2 MSS */
+		.tlen = CONST_MSS_V6 * 2,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2 MSS + 1B */
+		.tlen = (CONST_MSS_V6 * 2) + 1,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send MAX segs */
+		.tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
+	},
+
+	{
+		/* send MAX bytes */
+		.tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = CONST_MAX_SEGS_V6,
+		.r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
+			      (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
+	},
+	{
+		/* send MAX + 1: fail */
+		.tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
+		.gso_len = CONST_MSS_V6,
+		.tfail = true,
+	},
+	{
+		/* send a single 1B MSS: will fall back to no GSO */
+		.tlen = 1,
+		.gso_len = 1,
+		.r_num_mss = 1,
+	},
+	{
+		/* send 2 1B segments */
+		.tlen = 2,
+		.gso_len = 1,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2B + 2B + 1B segments */
+		.tlen = 5,
+		.gso_len = 2,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send max number of min sized segments */
+		.tlen = UDP_MAX_SEGMENTS,
+		.gso_len = 1,
+		.r_num_mss = UDP_MAX_SEGMENTS,
+	},
+	{
+		/* send max number + 1 of min sized segments: fail */
+		.tlen = UDP_MAX_SEGMENTS + 1,
+		.gso_len = 1,
+		.tfail = true,
+	},
+	{
+		/* EOL */
+	}
+};
+
+static unsigned int get_device_mtu(int fd, const char *ifname)
+{
+	struct ifreq ifr;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	strcpy(ifr.ifr_name, ifname);
+
+	if (ioctl(fd, SIOCGIFMTU, &ifr))
+		error(1, errno, "ioctl get mtu");
+
+	return ifr.ifr_mtu;
+}
+
+static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
+{
+	struct ifreq ifr;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	ifr.ifr_mtu = mtu;
+	strcpy(ifr.ifr_name, ifname);
+
+	if (ioctl(fd, SIOCSIFMTU, &ifr))
+		error(1, errno, "ioctl set mtu");
+}
+
+static void set_device_mtu(int fd, int mtu)
+{
+	int val;
+
+	val = get_device_mtu(fd, cfg_ifname);
+	fprintf(stderr, "device mtu (orig): %u\n", val);
+
+	__set_device_mtu(fd, cfg_ifname, mtu);
+	val = get_device_mtu(fd, cfg_ifname);
+	if (val != mtu)
+		error(1, 0, "unable to set device mtu to %u\n", val);
+
+	fprintf(stderr, "device mtu (test): %u\n", val);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+	int level, name, val;
+
+	if (is_ipv4) {
+		level	= SOL_IP;
+		name	= IP_MTU_DISCOVER;
+		val	= IP_PMTUDISC_DO;
+	} else {
+		level	= SOL_IPV6;
+		name	= IPV6_MTU_DISCOVER;
+		val	= IPV6_PMTUDISC_DO;
+	}
+
+	if (setsockopt(fd, level, name, &val, sizeof(val)))
+		error(1, errno, "setsockopt path mtu");
+}
+
+static unsigned int get_path_mtu(int fd, bool is_ipv4)
+{
+	socklen_t vallen;
+	unsigned int mtu;
+	int ret;
+
+	vallen = sizeof(mtu);
+	if (is_ipv4)
+		ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
+	else
+		ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
+
+	if (ret)
+		error(1, errno, "getsockopt mtu");
+
+
+	fprintf(stderr, "path mtu (read):  %u\n", mtu);
+	return mtu;
+}
+
+/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
+static void set_route_mtu(int mtu, bool is_ipv4)
+{
+	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+	struct nlmsghdr *nh;
+	struct rtattr *rta;
+	struct rtmsg *rt;
+	char data[NLMSG_ALIGN(sizeof(*nh)) +
+		  NLMSG_ALIGN(sizeof(*rt)) +
+		  NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
+		  NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
+		  NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
+	int fd, ret, alen, off = 0;
+
+	alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (fd == -1)
+		error(1, errno, "socket netlink");
+
+	memset(data, 0, sizeof(data));
+
+	nh = (void *)data;
+	nh->nlmsg_type = RTM_NEWROUTE;
+	nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+	off += NLMSG_ALIGN(sizeof(*nh));
+
+	rt = (void *)(data + off);
+	rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
+	rt->rtm_table = RT_TABLE_MAIN;
+	rt->rtm_dst_len = alen << 3;
+	rt->rtm_protocol = RTPROT_BOOT;
+	rt->rtm_scope = RT_SCOPE_UNIVERSE;
+	rt->rtm_type = RTN_UNICAST;
+	off += NLMSG_ALIGN(sizeof(*rt));
+
+	rta = (void *)(data + off);
+	rta->rta_type = RTA_DST;
+	rta->rta_len = RTA_LENGTH(alen);
+	if (is_ipv4)
+		memcpy(RTA_DATA(rta), &addr4, alen);
+	else
+		memcpy(RTA_DATA(rta), &addr6, alen);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	rta = (void *)(data + off);
+	rta->rta_type = RTA_OIF;
+	rta->rta_len = RTA_LENGTH(sizeof(int));
+	*((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	/* MTU is a subtype in a metrics type */
+	rta = (void *)(data + off);
+	rta->rta_type = RTA_METRICS;
+	rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	/* now fill MTU subtype. Note that it fits within above rta_len */
+	rta = (void *)(((char *) rta) + RTA_LENGTH(0));
+	rta->rta_type = RTAX_MTU;
+	rta->rta_len = RTA_LENGTH(sizeof(int));
+	*((int *)(RTA_DATA(rta))) = mtu;
+
+	nh->nlmsg_len = off;
+
+	ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
+	if (ret != off)
+		error(1, errno, "send netlink: %uB != %uB\n", ret, off);
+
+	if (close(fd))
+		error(1, errno, "close netlink");
+
+	fprintf(stderr, "route mtu (test): %u\n", mtu);
+}
+
+static bool __send_one(int fd, struct msghdr *msg, int flags)
+{
+	int ret;
+
+	ret = sendmsg(fd, msg, flags);
+	if (ret == -1 &&
+	    (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL))
+		return false;
+	if (ret == -1)
+		error(1, errno, "sendmsg");
+	if (ret != msg->msg_iov->iov_len)
+		error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+	if (msg->msg_flags)
+		error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
+
+	return true;
+}
+
+static bool send_one(int fd, int len, int gso_len,
+		     struct sockaddr *addr, socklen_t alen)
+{
+	char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	struct cmsghdr *cm;
+
+	iov.iov_base = buf;
+	iov.iov_len = len;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	msg.msg_name = addr;
+	msg.msg_namelen = alen;
+
+	if (gso_len && !cfg_do_setsockopt) {
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
+
+		cm = CMSG_FIRSTHDR(&msg);
+		cm->cmsg_level = SOL_UDP;
+		cm->cmsg_type = UDP_SEGMENT;
+		cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+		*((uint16_t *) CMSG_DATA(cm)) = gso_len;
+	}
+
+	/* If MSG_MORE, send 1 byte followed by remainder */
+	if (cfg_do_msgmore && len > 1) {
+		iov.iov_len = 1;
+		if (!__send_one(fd, &msg, MSG_MORE))
+			error(1, 0, "send 1B failed");
+
+		iov.iov_base++;
+		iov.iov_len = len - 1;
+	}
+
+	return __send_one(fd, &msg, 0);
+}
+
+static int recv_one(int fd, int flags)
+{
+	int ret;
+
+	ret = recv(fd, buf, sizeof(buf), flags);
+	if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
+		return 0;
+	if (ret == -1)
+		error(1, errno, "recv");
+
+	return ret;
+}
+
+static void run_one(struct testcase *test, int fdt, int fdr,
+		    struct sockaddr *addr, socklen_t alen)
+{
+	int i, ret, val, mss;
+	bool sent;
+
+	fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
+			addr->sa_family == AF_INET ? 4 : 6,
+			test->tlen, test->gso_len,
+			test->tfail ? "(fail)" : "");
+
+	val = test->gso_len;
+	if (cfg_do_setsockopt) {
+		if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
+			error(1, errno, "setsockopt udp segment");
+	}
+
+	sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
+	if (sent && test->tfail)
+		error(1, 0, "send succeeded while expecting failure");
+	if (!sent && !test->tfail)
+		error(1, 0, "send failed while expecting success");
+	if (!sent)
+		return;
+
+	if (test->gso_len)
+		mss = test->gso_len;
+	else
+		mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
+
+
+	/* Recv all full MSS datagrams */
+	for (i = 0; i < test->r_num_mss; i++) {
+		ret = recv_one(fdr, 0);
+		if (ret != mss)
+			error(1, 0, "recv.%d: %d != %d", i, ret, mss);
+	}
+
+	/* Recv the non-full last datagram, if tlen was not a multiple of mss */
+	if (test->r_len_last) {
+		ret = recv_one(fdr, 0);
+		if (ret != test->r_len_last)
+			error(1, 0, "recv.%d: %d != %d (last)",
+			      i, ret, test->r_len_last);
+	}
+
+	/* Verify received all data */
+	ret = recv_one(fdr, MSG_DONTWAIT);
+	if (ret)
+		error(1, 0, "recv: unexpected datagram");
+}
+
+static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
+{
+	struct testcase *tests, *test;
+
+	tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
+
+	for (test = tests; test->tlen; test++) {
+		/* if a specific test is given, then skip all others */
+		if (cfg_specific_test_id == -1 ||
+		    cfg_specific_test_id == test - tests)
+			run_one(test, fdt, fdr, addr, alen);
+	}
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	int fdr, fdt, val;
+
+	fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fdr == -1)
+		error(1, errno, "socket r");
+
+	if (bind(fdr, addr, alen))
+		error(1, errno, "bind");
+
+	/* Have tests fail quickly instead of hang */
+	if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fdt == -1)
+		error(1, errno, "socket t");
+
+	/* Do not fragment these datagrams: only succeed if GSO works */
+	set_pmtu_discover(fdt, addr->sa_family == AF_INET);
+
+	if (cfg_do_connectionless) {
+		set_device_mtu(fdt, CONST_MTU_TEST);
+		run_all(fdt, fdr, addr, alen);
+	}
+
+	if (cfg_do_connected) {
+		set_device_mtu(fdt, CONST_MTU_TEST + 100);
+		set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
+
+		if (connect(fdt, addr, alen))
+			error(1, errno, "connect");
+
+		val = get_path_mtu(fdt, addr->sa_family == AF_INET);
+		if (val != CONST_MTU_TEST)
+			error(1, 0, "bad path mtu %u\n", val);
+
+		run_all(fdt, fdr, addr, 0 /* use connected addr */);
+	}
+
+	if (close(fdt))
+		error(1, errno, "close t");
+	if (close(fdr))
+		error(1, errno, "close r");
+}
+
+static void run_test_v4(void)
+{
+	struct sockaddr_in addr = {0};
+
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(cfg_port);
+	addr.sin_addr = addr4;
+
+	run_test((void *)&addr, sizeof(addr));
+}
+
+static void run_test_v6(void)
+{
+	struct sockaddr_in6 addr = {0};
+
+	addr.sin6_family = AF_INET6;
+	addr.sin6_port = htons(cfg_port);
+	addr.sin6_addr = addr6;
+
+	run_test((void *)&addr, sizeof(addr));
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_do_ipv4 = true;
+			break;
+		case '6':
+			cfg_do_ipv6 = true;
+			break;
+		case 'c':
+			cfg_do_connected = true;
+			break;
+		case 'C':
+			cfg_do_connectionless = true;
+			break;
+		case 'm':
+			cfg_do_msgmore = true;
+			break;
+		case 's':
+			cfg_do_setsockopt = true;
+			break;
+		case 't':
+			cfg_specific_test_id = strtoul(optarg, NULL, 0);
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (cfg_do_ipv4)
+		run_test_v4();
+	if (cfg_do_ipv6)
+		run_test_v6();
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
new file mode 100755
index 000000000..fec24f584
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso regression tests
+
+echo "ipv4 cmsg"
+./in_netns.sh ./udpgso -4 -C
+
+echo "ipv4 setsockopt"
+./in_netns.sh ./udpgso -4 -C -s
+
+echo "ipv6 cmsg"
+./in_netns.sh ./udpgso -6 -C
+
+echo "ipv6 setsockopt"
+./in_netns.sh ./udpgso -6 -C -s
+
+echo "ipv4 connected"
+./in_netns.sh ./udpgso -4 -c
+
+# blocked on 2nd loopback address
+# echo "ipv6 connected"
+# ./in_netns.sh ./udpgso -6 -c
+
+echo "ipv4 msg_more"
+./in_netns.sh ./udpgso -4 -C -m
+
+echo "ipv6 msg_more"
+./in_netns.sh ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755
index 000000000..99e537ab5
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso benchmarks
+
+wake_children() {
+	local -r jobs="$(jobs -p)"
+
+	if [[ "${jobs}" != "" ]]; then
+		kill -1 ${jobs} 2>/dev/null
+	fi
+}
+trap wake_children EXIT
+
+run_one() {
+	local -r args=$@
+
+	./udpgso_bench_rx &
+	./udpgso_bench_rx -t &
+
+	./udpgso_bench_tx ${args}
+}
+
+run_in_netns() {
+	local -r args=$@
+
+	./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+	local -r args=$@
+
+	echo "udp"
+	run_in_netns ${args}
+
+	echo "udp gso"
+	run_in_netns ${args} -S
+}
+
+run_tcp() {
+	local -r args=$@
+
+	echo "tcp"
+	run_in_netns ${args} -t
+
+	echo "tcp zerocopy"
+	run_in_netns ${args} -t -z
+}
+
+run_all() {
+	local -r core_args="-l 4"
+	local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
+	local -r ipv6_args="${core_args} -6 -D ::1"
+
+	echo "ipv4"
+	run_tcp "${ipv4_args}"
+	run_udp "${ipv4_args}"
+
+	echo "ipv6"
+	run_tcp "${ipv4_args}"
+	run_udp "${ipv6_args}"
+}
+
+if [[ $# -eq 0 ]]; then
+	run_all
+elif [[ $1 == "__subprocess" ]]; then
+	shift
+	run_one $@
+else
+	run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644
index 000000000..727cf67a3
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int  cfg_port		= 8000;
+static bool cfg_tcp;
+static bool cfg_verify;
+
+static bool interrupted;
+static unsigned long packets, bytes;
+
+static void sigint_handler(int signum)
+{
+	if (signum == SIGINT)
+		interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_poll(int fd)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.events = POLLIN;
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	do {
+		ret = poll(&pfd, 1, 10);
+		if (ret == -1)
+			error(1, errno, "poll");
+		if (ret == 0)
+			continue;
+		if (pfd.revents != POLLIN)
+			error(1, errno, "poll: 0x%x expected 0x%x\n",
+					pfd.revents, POLLIN);
+	} while (!ret && !interrupted);
+}
+
+static int do_socket(bool do_tcp)
+{
+	struct sockaddr_in6 addr = {0};
+	int fd, val;
+
+	fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket");
+
+	val = 1 << 21;
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
+		error(1, errno, "setsockopt rcvbuf");
+	val = 1;
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
+		error(1, errno, "setsockopt reuseport");
+
+	addr.sin6_family =	PF_INET6;
+	addr.sin6_port =	htons(cfg_port);
+	addr.sin6_addr =	in6addr_any;
+	if (bind(fd, (void *) &addr, sizeof(addr)))
+		error(1, errno, "bind");
+
+	if (do_tcp) {
+		int accept_fd = fd;
+
+		if (listen(accept_fd, 1))
+			error(1, errno, "listen");
+
+		do_poll(accept_fd);
+
+		fd = accept(accept_fd, NULL, NULL);
+		if (fd == -1)
+			error(1, errno, "accept");
+		if (close(accept_fd))
+			error(1, errno, "close accept fd");
+	}
+
+	return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+	int ret;
+
+	while (true) {
+		/* MSG_TRUNC flushes up to len bytes */
+		ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			return;
+		if (ret == -1)
+			error(1, errno, "flush");
+		if (ret == 0) {
+			/* client detached */
+			exit(0);
+		}
+
+		packets++;
+		bytes += ret;
+	}
+
+}
+
+static char sanitized_char(char val)
+{
+	return (val >= 'a' && val <= 'z') ? val : '.';
+}
+
+static void do_verify_udp(const char *data, int len)
+{
+	char cur = data[0];
+	int i;
+
+	/* verify contents */
+	if (cur < 'a' || cur > 'z')
+		error(1, 0, "data initial byte out of range");
+
+	for (i = 1; i < len; i++) {
+		if (cur == 'z')
+			cur = 'a';
+		else
+			cur++;
+
+		if (data[i] != cur)
+			error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
+			      i, len,
+			      sanitized_char(data[i]), data[i],
+			      sanitized_char(cur), cur);
+	}
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_udp(int fd)
+{
+	static char rbuf[ETH_DATA_LEN];
+	int ret, len, budget = 256;
+
+	len = cfg_verify ? sizeof(rbuf) : 0;
+	while (budget--) {
+		/* MSG_TRUNC will make return value full datagram length */
+		ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			return;
+		if (ret == -1)
+			error(1, errno, "recv");
+		if (len) {
+			if (ret == 0)
+				error(1, errno, "recv: 0 byte datagram\n");
+
+			do_verify_udp(rbuf, ret);
+		}
+
+		packets++;
+		bytes += ret;
+	}
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "ptv")) != -1) {
+		switch (c) {
+		case 'p':
+			cfg_port = htons(strtoul(optarg, NULL, 0));
+			break;
+		case 't':
+			cfg_tcp = true;
+			break;
+		case 'v':
+			cfg_verify = true;
+			break;
+		}
+	}
+
+	if (optind != argc)
+		usage(argv[0]);
+
+	if (cfg_tcp && cfg_verify)
+		error(1, 0, "TODO: implement verify mode for tcp");
+}
+
+static void do_recv(void)
+{
+	unsigned long tnow, treport;
+	int fd;
+
+	fd = do_socket(cfg_tcp);
+
+	treport = gettimeofday_ms() + 1000;
+	do {
+		do_poll(fd);
+
+		if (cfg_tcp)
+			do_flush_tcp(fd);
+		else
+			do_flush_udp(fd);
+
+		tnow = gettimeofday_ms();
+		if (tnow > treport) {
+			if (packets)
+				fprintf(stderr,
+					"%s rx: %6lu MB/s %8lu calls/s\n",
+					cfg_tcp ? "tcp" : "udp",
+					bytes >> 20, packets);
+			bytes = packets = 0;
+			treport = tnow + 1000;
+		}
+
+	} while (!interrupted);
+
+	if (close(fd))
+		error(1, errno, "close");
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	signal(SIGINT, sigint_handler);
+
+	do_recv();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644
index 000000000..463a2cbd0
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT		103
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY	60
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY	0x4000000
+#endif
+
+#define NUM_PKT		100
+
+static bool	cfg_cache_trash;
+static int	cfg_cpu		= -1;
+static int	cfg_connected	= true;
+static int	cfg_family	= PF_UNSPEC;
+static uint16_t	cfg_mss;
+static int	cfg_payload_len	= (1472 * 42);
+static int	cfg_port	= 8000;
+static int	cfg_runtime_ms	= -1;
+static bool	cfg_segment;
+static bool	cfg_sendmmsg;
+static bool	cfg_tcp;
+static bool	cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static bool interrupted;
+static char buf[NUM_PKT][ETH_MAX_MTU];
+
+static void sigint_handler(int signum)
+{
+	if (signum == SIGINT)
+		interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int set_cpu(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask))
+		error(1, 0, "setaffinity %d", cpu);
+
+	return 0;
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static void flush_zerocopy(int fd)
+{
+	struct msghdr msg = {0};	/* flush */
+	int ret;
+
+	while (1) {
+		ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+		if (ret == -1 && errno == EAGAIN)
+			break;
+		if (ret == -1)
+			error(1, errno, "errqueue");
+		if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+			error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+		msg.msg_flags = 0;
+	}
+}
+
+static int send_tcp(int fd, char *data)
+{
+	int ret, done = 0, count = 0;
+
+	while (done < cfg_payload_len) {
+		ret = send(fd, data + done, cfg_payload_len - done,
+			   cfg_zerocopy ? MSG_ZEROCOPY : 0);
+		if (ret == -1)
+			error(1, errno, "write");
+
+		done += ret;
+		count++;
+	}
+
+	return count;
+}
+
+static int send_udp(int fd, char *data)
+{
+	int ret, total_len, len, count = 0;
+
+	total_len = cfg_payload_len;
+
+	while (total_len) {
+		len = total_len < cfg_mss ? total_len : cfg_mss;
+
+		ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
+			     cfg_connected ? NULL : (void *)&cfg_dst_addr,
+			     cfg_connected ? 0 : cfg_alen);
+		if (ret == -1)
+			error(1, errno, "write");
+		if (ret != len)
+			error(1, errno, "write: %uB != %uB\n", ret, len);
+
+		total_len -= len;
+		count++;
+	}
+
+	return count;
+}
+
+static int send_udp_sendmmsg(int fd, char *data)
+{
+	const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
+	struct mmsghdr mmsgs[max_nr_msg];
+	struct iovec iov[max_nr_msg];
+	unsigned int off = 0, left;
+	int i = 0, ret;
+
+	memset(mmsgs, 0, sizeof(mmsgs));
+
+	left = cfg_payload_len;
+	while (left) {
+		if (i == max_nr_msg)
+			error(1, 0, "sendmmsg: exceeds max_nr_msg");
+
+		iov[i].iov_base = data + off;
+		iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
+
+		mmsgs[i].msg_hdr.msg_iov = iov + i;
+		mmsgs[i].msg_hdr.msg_iovlen = 1;
+
+		off += iov[i].iov_len;
+		left -= iov[i].iov_len;
+		i++;
+	}
+
+	ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+	if (ret == -1)
+		error(1, errno, "sendmmsg");
+
+	return ret;
+}
+
+static void send_udp_segment_cmsg(struct cmsghdr *cm)
+{
+	uint16_t *valp;
+
+	cm->cmsg_level = SOL_UDP;
+	cm->cmsg_type = UDP_SEGMENT;
+	cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+	valp = (void *)CMSG_DATA(cm);
+	*valp = cfg_mss;
+}
+
+static int send_udp_segment(int fd, char *data)
+{
+	char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	int ret;
+
+	iov.iov_base = data;
+	iov.iov_len = cfg_payload_len;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+	send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+
+	msg.msg_name = (void *)&cfg_dst_addr;
+	msg.msg_namelen = cfg_alen;
+
+	ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+	if (ret == -1)
+		error(1, errno, "sendmsg");
+	if (ret != iov.iov_len)
+		error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+
+	return 1;
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+		    filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	const char *bind_addr = NULL;
+	int max_len, hdrlen;
+	int c;
+
+	while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+		switch (c) {
+		case '4':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'c':
+			cfg_cache_trash = true;
+			break;
+		case 'C':
+			cfg_cpu = strtol(optarg, NULL, 0);
+			break;
+		case 'D':
+			bind_addr = optarg;
+			break;
+		case 'l':
+			cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+			break;
+		case 'm':
+			cfg_sendmmsg = true;
+			break;
+		case 'p':
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 's':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'S':
+			cfg_segment = true;
+			break;
+		case 't':
+			cfg_tcp = true;
+			break;
+		case 'u':
+			cfg_connected = false;
+			break;
+		case 'z':
+			cfg_zerocopy = true;
+			break;
+		}
+	}
+
+	if (!bind_addr)
+		bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+	setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
+
+	if (optind != argc)
+		usage(argv[0]);
+
+	if (cfg_family == PF_UNSPEC)
+		error(1, 0, "must pass one of -4 or -6");
+	if (cfg_tcp && !cfg_connected)
+		error(1, 0, "connectionless tcp makes no sense");
+	if (cfg_segment && cfg_sendmmsg)
+		error(1, 0, "cannot combine segment offload and sendmmsg");
+
+	if (cfg_family == PF_INET)
+		hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+	else
+		hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
+
+	cfg_mss = ETH_DATA_LEN - hdrlen;
+	max_len = ETH_MAX_MTU - hdrlen;
+
+	if (cfg_payload_len > max_len)
+		error(1, 0, "payload length %u exceeds max %u",
+		      cfg_payload_len, max_len);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+	int level, name, val;
+
+	if (is_ipv4) {
+		level	= SOL_IP;
+		name	= IP_MTU_DISCOVER;
+		val	= IP_PMTUDISC_DO;
+	} else {
+		level	= SOL_IPV6;
+		name	= IPV6_MTU_DISCOVER;
+		val	= IPV6_PMTUDISC_DO;
+	}
+
+	if (setsockopt(fd, level, name, &val, sizeof(val)))
+		error(1, errno, "setsockopt path mtu");
+}
+
+int main(int argc, char **argv)
+{
+	unsigned long num_msgs, num_sends;
+	unsigned long tnow, treport, tstop;
+	int fd, i, val;
+
+	parse_opts(argc, argv);
+
+	if (cfg_cpu > 0)
+		set_cpu(cfg_cpu);
+
+	for (i = 0; i < sizeof(buf[0]); i++)
+		buf[0][i] = 'a' + (i % 26);
+	for (i = 1; i < NUM_PKT; i++)
+		memcpy(buf[i], buf[0], sizeof(buf[0]));
+
+	signal(SIGINT, sigint_handler);
+
+	fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket");
+
+	if (cfg_zerocopy) {
+		val = 1;
+		if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+			error(1, errno, "setsockopt zerocopy");
+	}
+
+	if (cfg_connected &&
+	    connect(fd, (void *)&cfg_dst_addr, cfg_alen))
+		error(1, errno, "connect");
+
+	if (cfg_segment)
+		set_pmtu_discover(fd, cfg_family == PF_INET);
+
+	num_msgs = num_sends = 0;
+	tnow = gettimeofday_ms();
+	tstop = tnow + cfg_runtime_ms;
+	treport = tnow + 1000;
+
+	i = 0;
+	do {
+		if (cfg_tcp)
+			num_sends += send_tcp(fd, buf[i]);
+		else if (cfg_segment)
+			num_sends += send_udp_segment(fd, buf[i]);
+		else if (cfg_sendmmsg)
+			num_sends += send_udp_sendmmsg(fd, buf[i]);
+		else
+			num_sends += send_udp(fd, buf[i]);
+		num_msgs++;
+
+		if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
+			flush_zerocopy(fd);
+
+		tnow = gettimeofday_ms();
+		if (tnow > treport) {
+			fprintf(stderr,
+				"%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+				cfg_tcp ? "tcp" : "udp",
+				(num_msgs * cfg_payload_len) >> 20,
+				num_sends, num_msgs);
+			num_msgs = num_sends = 0;
+			treport = tnow + 1000;
+		}
+
+		/* cold cache when writing buffer */
+		if (cfg_cache_trash)
+			i = ++i < NUM_PKT ? i : 0;
+
+	} while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
new file mode 100644
index 000000000..a37cb1192
--- /dev/null
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for netfilter selftests
+
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
new file mode 100644
index 000000000..59caa8f71
--- /dev/null
+++ b/tools/testing/selftests/netfilter/config
@@ -0,0 +1,2 @@
+CONFIG_NET_NS=y
+CONFIG_NF_TABLES_INET=y
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
new file mode 100755
index 000000000..b48e1833b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+cleanup() {
+	for i in 1 2;do ip netns del nsclient$i;done
+	for i in 1 2;do ip netns del nsrouter$i;done
+}
+
+ipv4() {
+    echo -n 192.168.$1.2
+}
+
+ipv6 () {
+    echo -n dead:$1::2
+}
+
+check_counter()
+{
+	ns=$1
+	name=$2
+	expect=$3
+	local lret=0
+
+	cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
+	if [ $? -ne 0 ]; then
+		echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+		ip netns exec $ns nft list counter inet filter "$name" 1>&2
+		lret=1
+	fi
+
+	return $lret
+}
+
+check_unknown()
+{
+	expect="packets 0 bytes 0"
+	for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+		check_counter $n "unknown" "$expect"
+		if [ $? -ne 0 ] ;then
+			return 1
+		fi
+	done
+
+	return 0
+}
+
+for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+  ip netns add $n
+  ip -net $n link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
+DEV=veth0
+ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+
+DEV=veth0
+ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+
+DEV=veth0
+for i in 1 2; do
+    ip -net nsclient$i link set $DEV up
+    ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
+    ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
+done
+
+ip -net nsrouter1 link set eth1 up
+ip -net nsrouter1 link set veth0 up
+
+ip -net nsrouter2 link set eth1 up
+ip -net nsrouter2 link set eth2 up
+
+ip -net nsclient1 route add default via 192.168.1.1
+ip -net nsclient1 -6 route add default via dead:1::1
+
+ip -net nsclient2 route add default via 192.168.2.1
+ip -net nsclient2 route add default via dead:2::1
+
+i=3
+ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
+ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
+ip -net nsrouter1 addr add dead:1::1/64 dev eth1
+ip -net nsrouter1 addr add dead:3::1/64 dev veth0
+ip -net nsrouter1 route add default via 192.168.3.10
+ip -net nsrouter1 -6 route add default via dead:3::10
+
+ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
+ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
+ip -net nsrouter2 addr add dead:2::1/64 dev eth1
+ip -net nsrouter2 addr add dead:3::10/64 dev eth2
+ip -net nsrouter2 route add default via 192.168.3.1
+ip -net nsrouter2 route add default via dead:3::1
+
+sleep 2
+for i in 4 6; do
+	ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
+	ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in nsrouter1 nsrouter2; do
+ip netns exec $netns nft -f - <<EOF
+table inet filter {
+	counter unknown { }
+	counter related { }
+	chain forward {
+		type filter hook forward priority 0; policy accept;
+		meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+		meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+		meta l4proto { icmp, icmpv6 } ct state new,established accept
+		counter name "unknown" drop
+	}
+}
+EOF
+done
+
+ip netns exec nsclient1 nft -f - <<EOF
+table inet filter {
+	counter unknown { }
+	counter related { }
+	chain input {
+		type filter hook input priority 0; policy accept;
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+		meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+		counter name "unknown" drop
+	}
+}
+EOF
+
+ip netns exec nsclient2 nft -f - <<EOF
+table inet filter {
+	counter unknown { }
+	counter new { }
+	counter established { }
+
+	chain input {
+		type filter hook input priority 0; policy accept;
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+		meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+		meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+		counter name "unknown" drop
+	}
+	chain output {
+		type filter hook output priority 0; policy accept;
+		meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+		meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+		meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+		counter name "unknown" drop
+	}
+}
+EOF
+
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec nsrouter1 nft -f - <<EOF
+table ip nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		ip protocol icmp oifname "veth0" counter masquerade
+	}
+}
+table ip6 nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+	}
+}
+EOF
+
+ip netns exec nsrouter2 ip link set eth1  mtu 1280
+ip netns exec nsclient2 ip link set veth0 mtu 1280
+sleep 1
+
+ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
+if [ $? -ne 0 ]; then
+	echo "ERROR: netns ip routing/connectivity broken" 1>&2
+	cleanup
+	exit 1
+fi
+ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
+if [ $? -ne 0 ]; then
+	echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+	cleanup
+	exit 1
+fi
+
+check_unknown
+if [ $? -ne 0 ]; then
+	ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in nsrouter1 nsrouter2 nsclient1;do
+	check_counter "$netns" "related" "$expect"
+	if [ $? -ne 0 ]; then
+		ret=1
+	fi
+done
+
+expect="packets 2 bytes 2076"
+check_counter nsclient2 "new" "$expect"
+if [ $? -ne 0 ]; then
+	ret=1
+fi
+
+ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
+if [ $? -eq 0 ]; then
+	echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+	ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+check_counter "nsrouter2" "related" "$expect"
+if [ $? -ne 0 ]; then
+	ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in nsrouter1 nsclient1;do
+	check_counter "$netns" "related" "$expect"
+	if [ $? -ne 0 ]; then
+		ret=1
+	fi
+done
+
+ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
+if [ $? -eq 0 ]; then
+	echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+	ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in nsrouter1 nsclient1;do
+	check_counter "$netns" "related" "$expect"
+	if [ $? -ne 0 ]; then
+		ret=1
+	fi
+done
+
+if [ $ret -eq 0 ];then
+	echo "PASS: icmp mtu error had RELATED state"
+else
+	echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+cleanup
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
new file mode 100755
index 000000000..f25f72a75
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -0,0 +1,766 @@
+#!/bin/bash
+#
+# This test is for basic NAT functionality: snat, dnat, redirect, masquerade.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1
+if [ $? -ne 0 ];then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+fi
+ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
+
+ip -net ns0 link set lo up
+ip -net ns0 link set veth0 up
+ip -net ns0 addr add 10.0.1.1/24 dev veth0
+ip -net ns0 addr add dead:1::1/64 dev veth0
+
+ip -net ns0 link set veth1 up
+ip -net ns0 addr add 10.0.2.1/24 dev veth1
+ip -net ns0 addr add dead:2::1/64 dev veth1
+
+for i in 1 2; do
+  ip -net ns$i link set lo up
+  ip -net ns$i link set eth0 up
+  ip -net ns$i addr add 10.0.$i.99/24 dev eth0
+  ip -net ns$i route add default via 10.0.$i.1
+  ip -net ns$i addr add dead:$i::99/64 dev eth0
+  ip -net ns$i route add default via dead:$i::1
+done
+
+bad_counter()
+{
+	local ns=$1
+	local counter=$2
+	local expect=$3
+
+	echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2
+	ip netns exec $ns nft list counter inet filter $counter 1>&2
+}
+
+check_counters()
+{
+	ns=$1
+	local lret=0
+
+	cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84")
+	if [ $? -ne 0 ]; then
+		bad_counter $ns ns0in "packets 1 bytes 84"
+		lret=1
+	fi
+	cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84")
+	if [ $? -ne 0 ]; then
+		bad_counter $ns ns0out "packets 1 bytes 84"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 104"
+	cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect")
+	if [ $? -ne 0 ]; then
+		bad_counter $ns ns0in6 "$expect"
+		lret=1
+	fi
+	cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect")
+	if [ $? -ne 0 ]; then
+		bad_counter $ns ns0out6 "$expect"
+		lret=1
+	fi
+
+	return $lret
+}
+
+check_ns0_counters()
+{
+	local ns=$1
+	local lret=0
+
+	cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
+	if [ $? -ne 0 ]; then
+		bad_counter ns0 ns0in "packets 0 bytes 0"
+		lret=1
+	fi
+
+	cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
+	if [ $? -ne 0 ]; then
+		bad_counter ns0 ns0in6 "packets 0 bytes 0"
+		lret=1
+	fi
+
+	cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
+	if [ $? -ne 0 ]; then
+		bad_counter ns0 ns0out "packets 0 bytes 0"
+		lret=1
+	fi
+	cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
+	if [ $? -ne 0 ]; then
+		bad_counter ns0 ns0out6 "packets 0 bytes 0"
+		lret=1
+	fi
+
+	for dir in "in" "out" ; do
+		expect="packets 1 bytes 84"
+		cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 $ns$dir "$expect"
+			lret=1
+		fi
+
+		expect="packets 1 bytes 104"
+		cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 $ns$dir6 "$expect"
+			lret=1
+		fi
+	done
+
+	return $lret
+}
+
+reset_counters()
+{
+	for i in 0 1 2;do
+		ip netns exec ns$i nft reset counters inet > /dev/null
+	done
+}
+
+test_local_dnat6()
+{
+	local lret=0
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+	chain output {
+		type nat hook output priority 0; policy accept;
+		ip6 daddr dead:1::99 dnat to dead:2::99
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add ip6 dnat hook"
+		return $ksft_skip
+	fi
+
+	# ping netns1, expect rewrite to netns2
+	ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null
+	if [ $? -ne 0 ]; then
+		lret=1
+		echo "ERROR: ping6 failed"
+		return $lret
+	fi
+
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns2$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 0 count in ns1
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 1 packet in ns2
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2"
+	ip netns exec ns0 nft flush chain ip6 nat output
+
+	return $lret
+}
+
+test_local_dnat()
+{
+	local lret=0
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+	chain output {
+		type nat hook output priority 0; policy accept;
+		ip daddr 10.0.1.99 dnat to 10.0.2.99
+	}
+}
+EOF
+	# ping netns1, expect rewrite to netns2
+	ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+	if [ $? -ne 0 ]; then
+		lret=1
+		echo "ERROR: ping failed"
+		return $lret
+	fi
+
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns2$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 0 count in ns1
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 1 packet in ns2
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2"
+
+	ip netns exec ns0 nft flush chain ip nat output
+
+	reset_counters
+	ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+	if [ $? -ne 0 ]; then
+		lret=1
+		echo "ERROR: ping failed"
+		return $lret
+	fi
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns2$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 1 count in ns1
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 0 packet in ns2
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns2$dir "$expect"
+			lret=1
+		fi
+	done
+
+	test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush"
+
+	return $lret
+}
+
+
+test_masquerade6()
+{
+	local lret=0
+
+	ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 via ipv6"
+		return 1
+		lret=1
+	fi
+
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns2$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add masquerading rule
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		meta oif veth0 masquerade
+	}
+}
+EOF
+	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+		lret=1
+	fi
+
+	# ns1 should have seen packets from ns0, due to masquerade
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# ns1 should not have seen packets from ns2, due to masquerade
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	ip netns exec ns0 nft flush chain ip6 nat postrouting
+	if [ $? -ne 0 ]; then
+		echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+
+	return $lret
+}
+
+test_masquerade()
+{
+	local lret=0
+
+	ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+	ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: canot ping ns1 from ns2"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns2$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add masquerading rule
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		meta oif veth0 masquerade
+	}
+}
+EOF
+	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+		lret=1
+	fi
+
+	# ns1 should have seen packets from ns0, due to masquerade
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# ns1 should not have seen packets from ns2, due to masquerade
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	ip netns exec ns0 nft flush chain ip nat postrouting
+	if [ $? -ne 0 ]; then
+		echo "ERROR: Could not flush nat postrouting" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+
+	return $lret
+}
+
+test_redirect6()
+{
+	local lret=0
+
+	ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannnot ping ns1 from ns2 via ipv6"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns2$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add redirect rule
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+	chain prerouting {
+		type nat hook prerouting priority 0; policy accept;
+		meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect
+	}
+}
+EOF
+	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect"
+		lret=1
+	fi
+
+	# ns1 should have seen no packets from ns2, due to redirection
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# ns0 should have seen packets from ns2, due to masquerade
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	ip netns exec ns0 nft delete table ip6 nat
+	if [ $? -ne 0 ]; then
+		echo "ERROR: Could not delete ip6 nat table" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2"
+
+	return $lret
+}
+
+test_redirect()
+{
+	local lret=0
+
+	ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+	ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns2$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add redirect rule
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+	chain prerouting {
+		type nat hook prerouting priority 0; policy accept;
+		meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect
+	}
+}
+EOF
+	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 with active ip redirect"
+		lret=1
+	fi
+
+	# ns1 should have seen no packets from ns2, due to redirection
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# ns0 should have seen packets from ns2, due to masquerade
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	ip netns exec ns0 nft delete table ip nat
+	if [ $? -ne 0 ]; then
+		echo "ERROR: Could not delete nat table" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: IP redirection for ns2"
+
+	return $lret
+}
+
+
+# ip netns exec ns0 ping -c 1 -q 10.0.$i.99
+for i in 0 1 2; do
+ip netns exec ns$i nft -f - <<EOF
+table inet filter {
+	counter ns0in {}
+	counter ns1in {}
+	counter ns2in {}
+
+	counter ns0out {}
+	counter ns1out {}
+	counter ns2out {}
+
+	counter ns0in6 {}
+	counter ns1in6 {}
+	counter ns2in6 {}
+
+	counter ns0out6 {}
+	counter ns1out6 {}
+	counter ns2out6 {}
+
+	map nsincounter {
+		type ipv4_addr : counter
+		elements = { 10.0.1.1 : "ns0in",
+			     10.0.2.1 : "ns0in",
+			     10.0.1.99 : "ns1in",
+			     10.0.2.99 : "ns2in" }
+	}
+
+	map nsincounter6 {
+		type ipv6_addr : counter
+		elements = { dead:1::1 : "ns0in6",
+			     dead:2::1 : "ns0in6",
+			     dead:1::99 : "ns1in6",
+			     dead:2::99 : "ns2in6" }
+	}
+
+	map nsoutcounter {
+		type ipv4_addr : counter
+		elements = { 10.0.1.1 : "ns0out",
+			     10.0.2.1 : "ns0out",
+			     10.0.1.99: "ns1out",
+			     10.0.2.99: "ns2out" }
+	}
+
+	map nsoutcounter6 {
+		type ipv6_addr : counter
+		elements = { dead:1::1 : "ns0out6",
+			     dead:2::1 : "ns0out6",
+			     dead:1::99 : "ns1out6",
+			     dead:2::99 : "ns2out6" }
+	}
+
+	chain input {
+		type filter hook input priority 0; policy accept;
+		counter name ip saddr map @nsincounter
+		icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6
+	}
+	chain output {
+		type filter hook output priority 0; policy accept;
+		counter name ip daddr map @nsoutcounter
+		icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6
+	}
+}
+EOF
+done
+
+sleep 3
+# test basic connectivity
+for i in 1 2; do
+  ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null
+  if [ $? -ne 0 ];then
+  	echo "ERROR: Could not reach other namespace(s)" 1>&2
+	ret=1
+  fi
+
+  ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null
+  if [ $? -ne 0 ];then
+	echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
+	ret=1
+  fi
+  check_counters ns$i
+  if [ $? -ne 0 ]; then
+	ret=1
+  fi
+
+  check_ns0_counters ns$i
+  if [ $? -ne 0 ]; then
+	ret=1
+  fi
+  reset_counters
+done
+
+if [ $ret -eq 0 ];then
+	echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2"
+fi
+
+reset_counters
+test_local_dnat
+test_local_dnat6
+
+reset_counters
+test_masquerade
+test_masquerade6
+
+reset_counters
+test_redirect
+test_redirect6
+
+for i in 0 1 2; do ip netns del ns$i;done
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
new file mode 100755
index 000000000..f1affd12c
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+#
+# This test is for stress-testing the nf_tables config plane path vs.
+# packet path processing: Make sure we never release rules that are
+# still visible to other cpus.
+#
+# set -e
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+testns=testns1
+tables="foo bar baz quux"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+tmp=$(mktemp)
+
+for table in $tables; do
+	echo add table inet "$table" >> "$tmp"
+	echo flush table inet "$table" >> "$tmp"
+
+	echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp"
+	echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp"
+	for c in $(seq 1 400); do
+		chain=$(printf "chain%03u" "$c")
+		echo "add chain inet $table $chain" >> "$tmp"
+	done
+
+	for c in $(seq 1 400); do
+		chain=$(printf "chain%03u" "$c")
+		for BASE in INPUT OUTPUT; do
+			echo "add rule inet $table $BASE counter jump $chain" >> "$tmp"
+		done
+		echo "add rule inet $table $chain counter return" >> "$tmp"
+	done
+done
+
+ip netns add "$testns"
+ip -netns "$testns" link set lo up
+
+lscpu | grep ^CPU\(s\): | ( read cpu cpunum ;
+cpunum=$((cpunum-1))
+for i in $(seq 0 $cpunum);do
+	mask=$(printf 0x%x $((1<<$i)))
+        ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null &
+        ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null &
+done)
+
+sleep 1
+
+for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
+
+for table in $tables;do
+	randsleep=$((RANDOM%10))
+	sleep $randsleep
+	ip netns exec "$testns" nft delete table inet $table 2>/dev/null
+done
+
+randsleep=$((RANDOM%10))
+sleep $randsleep
+
+pkill -9 ping
+
+wait
+
+rm -f "$tmp"
+ip netns del "$testns"
diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore
new file mode 100644
index 000000000..d9355035e
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/.gitignore
@@ -0,0 +1,4 @@
+timestamping
+rxtimestamp
+txtimestamp
+hwtstamp_config
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
new file mode 100644
index 000000000..c46c0eefa
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -I../../../../../usr/include
+
+TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp
+
+all: $(TEST_PROGS)
+
+top_srcdir = ../../../../..
+KSFT_KHDR_INSTALL := 1
+include ../../lib.mk
+
+clean:
+	rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c
new file mode 100644
index 000000000..e1fdee841
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test program for SIOC{G,S}HWTSTAMP
+ * Copyright 2013 Solarflare Communications
+ * Author: Ben Hutchings
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#include <linux/if.h>
+#include <linux/net_tstamp.h>
+#include <linux/sockios.h>
+
+static int
+lookup_value(const char **names, int size, const char *name)
+{
+	int value;
+
+	for (value = 0; value < size; value++)
+		if (names[value] && strcasecmp(names[value], name) == 0)
+			return value;
+
+	return -1;
+}
+
+static const char *
+lookup_name(const char **names, int size, int value)
+{
+	return (value >= 0 && value < size) ? names[value] : NULL;
+}
+
+static void list_names(FILE *f, const char **names, int size)
+{
+	int value;
+
+	for (value = 0; value < size; value++)
+		if (names[value])
+			fprintf(f, "    %s\n", names[value]);
+}
+
+static const char *tx_types[] = {
+#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name
+	TX_TYPE(OFF),
+	TX_TYPE(ON),
+	TX_TYPE(ONESTEP_SYNC)
+#undef TX_TYPE
+};
+#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0])))
+
+static const char *rx_filters[] = {
+#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name
+	RX_FILTER(NONE),
+	RX_FILTER(ALL),
+	RX_FILTER(SOME),
+	RX_FILTER(PTP_V1_L4_EVENT),
+	RX_FILTER(PTP_V1_L4_SYNC),
+	RX_FILTER(PTP_V1_L4_DELAY_REQ),
+	RX_FILTER(PTP_V2_L4_EVENT),
+	RX_FILTER(PTP_V2_L4_SYNC),
+	RX_FILTER(PTP_V2_L4_DELAY_REQ),
+	RX_FILTER(PTP_V2_L2_EVENT),
+	RX_FILTER(PTP_V2_L2_SYNC),
+	RX_FILTER(PTP_V2_L2_DELAY_REQ),
+	RX_FILTER(PTP_V2_EVENT),
+	RX_FILTER(PTP_V2_SYNC),
+	RX_FILTER(PTP_V2_DELAY_REQ),
+#undef RX_FILTER
+};
+#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0])))
+
+static void usage(void)
+{
+	fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n"
+	      "tx_type is any of (case-insensitive):\n",
+	      stderr);
+	list_names(stderr, tx_types, N_TX_TYPES);
+	fputs("rx_filter is any of (case-insensitive):\n", stderr);
+	list_names(stderr, rx_filters, N_RX_FILTERS);
+}
+
+int main(int argc, char **argv)
+{
+	struct ifreq ifr;
+	struct hwtstamp_config config;
+	const char *name;
+	int sock;
+
+	if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) {
+		usage();
+		return 2;
+	}
+
+	if (argc == 4) {
+		config.flags = 0;
+		config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]);
+		config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]);
+		if (config.tx_type < 0 || config.rx_filter < 0) {
+			usage();
+			return 2;
+		}
+	}
+
+	sock = socket(AF_INET, SOCK_DGRAM, 0);
+	if (sock < 0) {
+		perror("socket");
+		return 1;
+	}
+
+	strcpy(ifr.ifr_name, argv[1]);
+	ifr.ifr_data = (caddr_t)&config;
+
+	if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) {
+		perror("ioctl");
+		return 1;
+	}
+
+	printf("flags = %#x\n", config.flags);
+	name = lookup_name(tx_types, N_TX_TYPES, config.tx_type);
+	if (name)
+		printf("tx_type = %s\n", name);
+	else
+		printf("tx_type = %d\n", config.tx_type);
+	name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter);
+	if (name)
+		printf("rx_filter = %s\n", name);
+	else
+		printf("rx_filter = %d\n", config.rx_filter);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
new file mode 100644
index 000000000..c6428f1ac
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
@@ -0,0 +1,389 @@
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <asm/types.h>
+#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct options {
+	int so_timestamp;
+	int so_timestampns;
+	int so_timestamping;
+};
+
+struct tstamps {
+	bool tstamp;
+	bool tstampns;
+	bool swtstamp;
+	bool hwtstamp;
+};
+
+struct socket_type {
+	char *friendly_name;
+	int type;
+	int protocol;
+	bool enabled;
+};
+
+struct test_case {
+	struct options sockopt;
+	struct tstamps expected;
+	bool enabled;
+};
+
+struct sof_flag {
+	int mask;
+	char *name;
+};
+
+static struct sof_flag sof_flags[] = {
+#define SOF_FLAG(f) { f, #f }
+	SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE),
+	SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE),
+	SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE),
+};
+
+static struct socket_type socket_types[] = {
+	{ "ip",		SOCK_RAW,	IPPROTO_EGP },
+	{ "udp",	SOCK_DGRAM,	IPPROTO_UDP },
+	{ "tcp",	SOCK_STREAM,	IPPROTO_TCP },
+};
+
+static struct test_case test_cases[] = {
+	{ {}, {} },
+	{
+		{ so_timestamp: 1 },
+		{ tstamp: true }
+	},
+	{
+		{ so_timestampns: 1 },
+		{ tstampns: true }
+	},
+	{
+		{ so_timestamp: 1, so_timestampns: 1 },
+		{ tstampns: true }
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
+		{}
+	},
+	{
+		/* Loopback device does not support hw timestamps. */
+		{ so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
+		{}
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
+		{}
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
+			| SOF_TIMESTAMPING_RX_HARDWARE },
+		{}
+	},
+	{
+		{ so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+			| SOF_TIMESTAMPING_RX_SOFTWARE },
+		{ swtstamp: true }
+	},
+	{
+		{ so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+			| SOF_TIMESTAMPING_RX_SOFTWARE },
+		{ tstamp: true, swtstamp: true }
+	},
+};
+
+static struct option long_options[] = {
+	{ "list_tests", no_argument, 0, 'l' },
+	{ "test_num", required_argument, 0, 'n' },
+	{ "op_size", required_argument, 0, 's' },
+	{ "tcp", no_argument, 0, 't' },
+	{ "udp", no_argument, 0, 'u' },
+	{ "ip", no_argument, 0, 'i' },
+	{ NULL, 0, NULL, 0 },
+};
+
+static int next_port = 19999;
+static int op_size = 10 * 1024;
+
+void print_test_case(struct test_case *t)
+{
+	int f = 0;
+
+	printf("sockopts {");
+	if (t->sockopt.so_timestamp)
+		printf(" SO_TIMESTAMP ");
+	if (t->sockopt.so_timestampns)
+		printf(" SO_TIMESTAMPNS ");
+	if (t->sockopt.so_timestamping) {
+		printf(" SO_TIMESTAMPING: {");
+		for (f = 0; f < ARRAY_SIZE(sof_flags); f++)
+			if (t->sockopt.so_timestamping & sof_flags[f].mask)
+				printf(" %s |", sof_flags[f].name);
+		printf("}");
+	}
+	printf("} expected cmsgs: {");
+	if (t->expected.tstamp)
+		printf(" SCM_TIMESTAMP ");
+	if (t->expected.tstampns)
+		printf(" SCM_TIMESTAMPNS ");
+	if (t->expected.swtstamp || t->expected.hwtstamp) {
+		printf(" SCM_TIMESTAMPING {");
+		if (t->expected.swtstamp)
+			printf("0");
+		if (t->expected.swtstamp && t->expected.hwtstamp)
+			printf(",");
+		if (t->expected.hwtstamp)
+			printf("2");
+		printf("}");
+	}
+	printf("}\n");
+}
+
+void do_send(int src)
+{
+	int r;
+	char *buf = malloc(op_size);
+
+	memset(buf, 'z', op_size);
+	r = write(src, buf, op_size);
+	if (r < 0)
+		error(1, errno, "Failed to sendmsg");
+
+	free(buf);
+}
+
+bool do_recv(int rcv, int read_size, struct tstamps expected)
+{
+	const int CMSG_SIZE = 1024;
+
+	struct scm_timestamping *ts;
+	struct tstamps actual = {};
+	char cmsg_buf[CMSG_SIZE];
+	struct iovec recv_iov;
+	struct cmsghdr *cmsg;
+	bool failed = false;
+	struct msghdr hdr;
+	int flags = 0;
+	int r;
+
+	memset(&hdr, 0, sizeof(hdr));
+	hdr.msg_iov = &recv_iov;
+	hdr.msg_iovlen = 1;
+	recv_iov.iov_base = malloc(read_size);
+	recv_iov.iov_len = read_size;
+
+	hdr.msg_control = cmsg_buf;
+	hdr.msg_controllen = sizeof(cmsg_buf);
+
+	r = recvmsg(rcv, &hdr, flags);
+	if (r < 0)
+		error(1, errno, "Failed to recvmsg");
+	if (r != read_size)
+		error(1, 0, "Only received %d bytes of payload.", r);
+
+	if (hdr.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
+		error(1, 0, "Message was truncated.");
+
+	for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
+	     cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
+		if (cmsg->cmsg_level != SOL_SOCKET)
+			error(1, 0, "Unexpected cmsg_level %d",
+			      cmsg->cmsg_level);
+		switch (cmsg->cmsg_type) {
+		case SCM_TIMESTAMP:
+			actual.tstamp = true;
+			break;
+		case SCM_TIMESTAMPNS:
+			actual.tstampns = true;
+			break;
+		case SCM_TIMESTAMPING:
+			ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
+			actual.swtstamp = !!ts->ts[0].tv_sec;
+			if (ts->ts[1].tv_sec != 0)
+				error(0, 0, "ts[1] should not be set.");
+			actual.hwtstamp = !!ts->ts[2].tv_sec;
+			break;
+		default:
+			error(1, 0, "Unexpected cmsg_type %d", cmsg->cmsg_type);
+		}
+	}
+
+#define VALIDATE(field) \
+	do { \
+		if (expected.field != actual.field) { \
+			if (expected.field) \
+				error(0, 0, "Expected " #field " to be set."); \
+			else \
+				error(0, 0, \
+				      "Expected " #field " to not be set."); \
+			failed = true; \
+		} \
+	} while (0)
+
+	VALIDATE(tstamp);
+	VALIDATE(tstampns);
+	VALIDATE(swtstamp);
+	VALIDATE(hwtstamp);
+#undef VALIDATE
+
+	free(recv_iov.iov_base);
+
+	return failed;
+}
+
+void config_so_flags(int rcv, struct options o)
+{
+	int on = 1;
+
+	if (setsockopt(rcv, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0)
+		error(1, errno, "Failed to enable SO_REUSEADDR");
+
+	if (o.so_timestamp &&
+	    setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMP,
+		       &o.so_timestamp, sizeof(o.so_timestamp)) < 0)
+		error(1, errno, "Failed to enable SO_TIMESTAMP");
+
+	if (o.so_timestampns &&
+	    setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPNS,
+		       &o.so_timestampns, sizeof(o.so_timestampns)) < 0)
+		error(1, errno, "Failed to enable SO_TIMESTAMPNS");
+
+	if (o.so_timestamping &&
+	    setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPING,
+		       &o.so_timestamping, sizeof(o.so_timestamping)) < 0)
+		error(1, errno, "Failed to set SO_TIMESTAMPING");
+}
+
+bool run_test_case(struct socket_type s, struct test_case t)
+{
+	int port = (s.type == SOCK_RAW) ? 0 : next_port++;
+	int read_size = op_size;
+	struct sockaddr_in addr;
+	bool failed = false;
+	int src, dst, rcv;
+
+	src = socket(AF_INET, s.type, s.protocol);
+	if (src < 0)
+		error(1, errno, "Failed to open src socket");
+
+	dst = socket(AF_INET, s.type, s.protocol);
+	if (dst < 0)
+		error(1, errno, "Failed to open dst socket");
+
+	memset(&addr, 0, sizeof(addr));
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	addr.sin_port = htons(port);
+
+	if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+		error(1, errno, "Failed to bind to port %d", port);
+
+	if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
+		error(1, errno, "Failed to listen");
+
+	if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+		error(1, errno, "Failed to connect");
+
+	if (s.type == SOCK_STREAM) {
+		rcv = accept(dst, NULL, NULL);
+		if (rcv < 0)
+			error(1, errno, "Failed to accept");
+		close(dst);
+	} else {
+		rcv = dst;
+	}
+
+	config_so_flags(rcv, t.sockopt);
+	usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
+	do_send(src);
+
+	if (s.type == SOCK_RAW)
+		read_size += 20;  /* for IP header */
+	failed = do_recv(rcv, read_size, t.expected);
+
+	close(rcv);
+	close(src);
+
+	return failed;
+}
+
+int main(int argc, char **argv)
+{
+	bool all_protocols = true;
+	bool all_tests = true;
+	int arg_index = 0;
+	int failures = 0;
+	int s, t, opt;
+
+	while ((opt = getopt_long(argc, argv, "", long_options,
+				  &arg_index)) != -1) {
+		switch (opt) {
+		case 'l':
+			for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
+				printf("%d\t", t);
+				print_test_case(&test_cases[t]);
+			}
+			return 0;
+		case 'n':
+			t = atoi(optarg);
+			if (t >= ARRAY_SIZE(test_cases))
+				error(1, 0, "Invalid test case: %d", t);
+			all_tests = false;
+			test_cases[t].enabled = true;
+			break;
+		case 's':
+			op_size = atoi(optarg);
+			break;
+		case 't':
+			all_protocols = false;
+			socket_types[2].enabled = true;
+			break;
+		case 'u':
+			all_protocols = false;
+			socket_types[1].enabled = true;
+			break;
+		case 'i':
+			all_protocols = false;
+			socket_types[0].enabled = true;
+			break;
+		default:
+			error(1, 0, "Failed to parse parameters.");
+		}
+	}
+
+	for (s = 0; s < ARRAY_SIZE(socket_types); s++) {
+		if (!all_protocols && !socket_types[s].enabled)
+			continue;
+
+		printf("Testing %s...\n", socket_types[s].friendly_name);
+		for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
+			if (!all_tests && !test_cases[t].enabled)
+				continue;
+
+			printf("Starting testcase %d...\n", t);
+			if (run_test_case(socket_types[s], test_cases[t])) {
+				failures++;
+				printf("FAILURE in test case ");
+				print_test_case(&test_cases[t]);
+			}
+		}
+	}
+	if (!failures)
+		printf("PASSED.\n");
+	return failures;
+}
diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/networking/timestamping/timestamping.c
new file mode 100644
index 000000000..900ed4b47
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/timestamping.c
@@ -0,0 +1,534 @@
+/*
+ * This program demonstrates how the various time stamping features in
+ * the Linux kernel work. It emulates the behavior of a PTP
+ * implementation in stand-alone master mode by sending PTPv1 Sync
+ * multicasts once every second. It looks for similar packets, but
+ * beyond that doesn't actually implement PTP.
+ *
+ * Outgoing packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support.
+ *
+ * Incoming packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and
+ * SO_TIMESTAMP[NS].
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <asm/types.h>
+#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+
+#ifndef SO_TIMESTAMPING
+# define SO_TIMESTAMPING         37
+# define SCM_TIMESTAMPING        SO_TIMESTAMPING
+#endif
+
+#ifndef SO_TIMESTAMPNS
+# define SO_TIMESTAMPNS 35
+#endif
+
+#ifndef SIOCGSTAMPNS
+# define SIOCGSTAMPNS 0x8907
+#endif
+
+#ifndef SIOCSHWTSTAMP
+# define SIOCSHWTSTAMP 0x89b0
+#endif
+
+static void usage(const char *error)
+{
+	if (error)
+		printf("invalid option: %s\n", error);
+	printf("timestamping interface option*\n\n"
+	       "Options:\n"
+	       "  IP_MULTICAST_LOOP - looping outgoing multicasts\n"
+	       "  SO_TIMESTAMP - normal software time stamping, ms resolution\n"
+	       "  SO_TIMESTAMPNS - more accurate software time stamping\n"
+	       "  SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n"
+	       "  SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n"
+	       "  SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n"
+	       "  SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
+	       "  SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
+	       "  SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
+	       "  SIOCGSTAMP - check last socket time stamp\n"
+	       "  SIOCGSTAMPNS - more accurate socket time stamp\n");
+	exit(1);
+}
+
+static void bail(const char *error)
+{
+	printf("%s: %s\n", error, strerror(errno));
+	exit(1);
+}
+
+static const unsigned char sync[] = {
+	0x00, 0x01, 0x00, 0x01,
+	0x5f, 0x44, 0x46, 0x4c,
+	0x54, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x01, 0x01,
+
+	/* fake uuid */
+	0x00, 0x01,
+	0x02, 0x03, 0x04, 0x05,
+
+	0x00, 0x01, 0x00, 0x37,
+	0x00, 0x00, 0x00, 0x08,
+	0x00, 0x00, 0x00, 0x00,
+	0x49, 0x05, 0xcd, 0x01,
+	0x29, 0xb1, 0x8d, 0xb0,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x01,
+
+	/* fake uuid */
+	0x00, 0x01,
+	0x02, 0x03, 0x04, 0x05,
+
+	0x00, 0x00, 0x00, 0x37,
+	0x00, 0x00, 0x00, 0x04,
+	0x44, 0x46, 0x4c, 0x54,
+	0x00, 0x00, 0xf0, 0x60,
+	0x00, 0x01, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x01,
+	0x00, 0x00, 0xf0, 0x60,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x04,
+	0x44, 0x46, 0x4c, 0x54,
+	0x00, 0x01,
+
+	/* fake uuid */
+	0x00, 0x01,
+	0x02, 0x03, 0x04, 0x05,
+
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+{
+	struct timeval now;
+	int res;
+
+	res = sendto(sock, sync, sizeof(sync), 0,
+		addr, addr_len);
+	gettimeofday(&now, 0);
+	if (res < 0)
+		printf("%s: %s\n", "send", strerror(errno));
+	else
+		printf("%ld.%06ld: sent %d bytes\n",
+		       (long)now.tv_sec, (long)now.tv_usec,
+		       res);
+}
+
+static void printpacket(struct msghdr *msg, int res,
+			char *data,
+			int sock, int recvmsg_flags,
+			int siocgstamp, int siocgstampns)
+{
+	struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+	struct cmsghdr *cmsg;
+	struct timeval tv;
+	struct timespec ts;
+	struct timeval now;
+
+	gettimeofday(&now, 0);
+
+	printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n",
+	       (long)now.tv_sec, (long)now.tv_usec,
+	       (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+	       res,
+	       inet_ntoa(from_addr->sin_addr),
+	       msg->msg_controllen);
+	for (cmsg = CMSG_FIRSTHDR(msg);
+	     cmsg;
+	     cmsg = CMSG_NXTHDR(msg, cmsg)) {
+		printf("   cmsg len %zu: ", cmsg->cmsg_len);
+		switch (cmsg->cmsg_level) {
+		case SOL_SOCKET:
+			printf("SOL_SOCKET ");
+			switch (cmsg->cmsg_type) {
+			case SO_TIMESTAMP: {
+				struct timeval *stamp =
+					(struct timeval *)CMSG_DATA(cmsg);
+				printf("SO_TIMESTAMP %ld.%06ld",
+				       (long)stamp->tv_sec,
+				       (long)stamp->tv_usec);
+				break;
+			}
+			case SO_TIMESTAMPNS: {
+				struct timespec *stamp =
+					(struct timespec *)CMSG_DATA(cmsg);
+				printf("SO_TIMESTAMPNS %ld.%09ld",
+				       (long)stamp->tv_sec,
+				       (long)stamp->tv_nsec);
+				break;
+			}
+			case SO_TIMESTAMPING: {
+				struct timespec *stamp =
+					(struct timespec *)CMSG_DATA(cmsg);
+				printf("SO_TIMESTAMPING ");
+				printf("SW %ld.%09ld ",
+				       (long)stamp->tv_sec,
+				       (long)stamp->tv_nsec);
+				stamp++;
+				/* skip deprecated HW transformed */
+				stamp++;
+				printf("HW raw %ld.%09ld",
+				       (long)stamp->tv_sec,
+				       (long)stamp->tv_nsec);
+				break;
+			}
+			default:
+				printf("type %d", cmsg->cmsg_type);
+				break;
+			}
+			break;
+		case IPPROTO_IP:
+			printf("IPPROTO_IP ");
+			switch (cmsg->cmsg_type) {
+			case IP_RECVERR: {
+				struct sock_extended_err *err =
+					(struct sock_extended_err *)CMSG_DATA(cmsg);
+				printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s",
+					strerror(err->ee_errno),
+					err->ee_origin,
+#ifdef SO_EE_ORIGIN_TIMESTAMPING
+					err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ?
+					"bounced packet" : "unexpected origin"
+#else
+					"probably SO_EE_ORIGIN_TIMESTAMPING"
+#endif
+					);
+				if (res < sizeof(sync))
+					printf(" => truncated data?!");
+				else if (!memcmp(sync, data + res - sizeof(sync),
+							sizeof(sync)))
+					printf(" => GOT OUR DATA BACK (HURRAY!)");
+				break;
+			}
+			case IP_PKTINFO: {
+				struct in_pktinfo *pktinfo =
+					(struct in_pktinfo *)CMSG_DATA(cmsg);
+				printf("IP_PKTINFO interface index %u",
+					pktinfo->ipi_ifindex);
+				break;
+			}
+			default:
+				printf("type %d", cmsg->cmsg_type);
+				break;
+			}
+			break;
+		default:
+			printf("level %d type %d",
+				cmsg->cmsg_level,
+				cmsg->cmsg_type);
+			break;
+		}
+		printf("\n");
+	}
+
+	if (siocgstamp) {
+		if (ioctl(sock, SIOCGSTAMP, &tv))
+			printf("   %s: %s\n", "SIOCGSTAMP", strerror(errno));
+		else
+			printf("SIOCGSTAMP %ld.%06ld\n",
+			       (long)tv.tv_sec,
+			       (long)tv.tv_usec);
+	}
+	if (siocgstampns) {
+		if (ioctl(sock, SIOCGSTAMPNS, &ts))
+			printf("   %s: %s\n", "SIOCGSTAMPNS", strerror(errno));
+		else
+			printf("SIOCGSTAMPNS %ld.%09ld\n",
+			       (long)ts.tv_sec,
+			       (long)ts.tv_nsec);
+	}
+}
+
+static void recvpacket(int sock, int recvmsg_flags,
+		       int siocgstamp, int siocgstampns)
+{
+	char data[256];
+	struct msghdr msg;
+	struct iovec entry;
+	struct sockaddr_in from_addr;
+	struct {
+		struct cmsghdr cm;
+		char control[512];
+	} control;
+	int res;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.msg_iov = &entry;
+	msg.msg_iovlen = 1;
+	entry.iov_base = data;
+	entry.iov_len = sizeof(data);
+	msg.msg_name = (caddr_t)&from_addr;
+	msg.msg_namelen = sizeof(from_addr);
+	msg.msg_control = &control;
+	msg.msg_controllen = sizeof(control);
+
+	res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT);
+	if (res < 0) {
+		printf("%s %s: %s\n",
+		       "recvmsg",
+		       (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+		       strerror(errno));
+	} else {
+		printpacket(&msg, res, data,
+			    sock, recvmsg_flags,
+			    siocgstamp, siocgstampns);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int so_timestamping_flags = 0;
+	int so_timestamp = 0;
+	int so_timestampns = 0;
+	int siocgstamp = 0;
+	int siocgstampns = 0;
+	int ip_multicast_loop = 0;
+	char *interface;
+	int i;
+	int enabled = 1;
+	int sock;
+	struct ifreq device;
+	struct ifreq hwtstamp;
+	struct hwtstamp_config hwconfig, hwconfig_requested;
+	struct sockaddr_in addr;
+	struct ip_mreq imr;
+	struct in_addr iaddr;
+	int val;
+	socklen_t len;
+	struct timeval next;
+	size_t if_len;
+
+	if (argc < 2)
+		usage(0);
+	interface = argv[1];
+	if_len = strlen(interface);
+	if (if_len >= IFNAMSIZ) {
+		printf("interface name exceeds IFNAMSIZ\n");
+		exit(1);
+	}
+
+	for (i = 2; i < argc; i++) {
+		if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
+			so_timestamp = 1;
+		else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
+			so_timestampns = 1;
+		else if (!strcasecmp(argv[i], "SIOCGSTAMP"))
+			siocgstamp = 1;
+		else if (!strcasecmp(argv[i], "SIOCGSTAMPNS"))
+			siocgstampns = 1;
+		else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
+			ip_multicast_loop = 1;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
+		else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
+			so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+		else
+			usage(argv[i]);
+	}
+
+	sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+	if (sock < 0)
+		bail("socket");
+
+	memset(&device, 0, sizeof(device));
+	memcpy(device.ifr_name, interface, if_len + 1);
+	if (ioctl(sock, SIOCGIFADDR, &device) < 0)
+		bail("getting interface IP address");
+
+	memset(&hwtstamp, 0, sizeof(hwtstamp));
+	memcpy(hwtstamp.ifr_name, interface, if_len + 1);
+	hwtstamp.ifr_data = (void *)&hwconfig;
+	memset(&hwconfig, 0, sizeof(hwconfig));
+	hwconfig.tx_type =
+		(so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
+		HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	hwconfig.rx_filter =
+		(so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+		HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
+	hwconfig_requested = hwconfig;
+	if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
+		if ((errno == EINVAL || errno == ENOTSUP) &&
+		    hwconfig_requested.tx_type == HWTSTAMP_TX_OFF &&
+		    hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE)
+			printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n");
+		else
+			bail("SIOCSHWTSTAMP");
+	}
+	printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n",
+	       hwconfig_requested.tx_type, hwconfig.tx_type,
+	       hwconfig_requested.rx_filter, hwconfig.rx_filter);
+
+	/* bind to PTP port */
+	addr.sin_family = AF_INET;
+	addr.sin_addr.s_addr = htonl(INADDR_ANY);
+	addr.sin_port = htons(319 /* PTP event port */);
+	if (bind(sock,
+		 (struct sockaddr *)&addr,
+		 sizeof(struct sockaddr_in)) < 0)
+		bail("bind");
+
+	/* set multicast group for outgoing packets */
+	inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
+	addr.sin_addr = iaddr;
+	imr.imr_multiaddr.s_addr = iaddr.s_addr;
+	imr.imr_interface.s_addr =
+		((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr;
+	if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF,
+		       &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0)
+		bail("set multicast");
+
+	/* join multicast group, loop our own packet */
+	if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,
+		       &imr, sizeof(struct ip_mreq)) < 0)
+		bail("join multicast group");
+
+	if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP,
+		       &ip_multicast_loop, sizeof(enabled)) < 0) {
+		bail("loop multicast");
+	}
+
+	/* set socket options for time stamping */
+	if (so_timestamp &&
+		setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP,
+			   &enabled, sizeof(enabled)) < 0)
+		bail("setsockopt SO_TIMESTAMP");
+
+	if (so_timestampns &&
+		setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS,
+			   &enabled, sizeof(enabled)) < 0)
+		bail("setsockopt SO_TIMESTAMPNS");
+
+	if (so_timestamping_flags &&
+		setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
+			   &so_timestamping_flags,
+			   sizeof(so_timestamping_flags)) < 0)
+		bail("setsockopt SO_TIMESTAMPING");
+
+	/* request IP_PKTINFO for debugging purposes */
+	if (setsockopt(sock, SOL_IP, IP_PKTINFO,
+		       &enabled, sizeof(enabled)) < 0)
+		printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno));
+
+	/* verify socket options */
+	len = sizeof(val);
+	if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0)
+		printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno));
+	else
+		printf("SO_TIMESTAMP %d\n", val);
+
+	if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0)
+		printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS",
+		       strerror(errno));
+	else
+		printf("SO_TIMESTAMPNS %d\n", val);
+
+	if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
+		printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
+		       strerror(errno));
+	} else {
+		printf("SO_TIMESTAMPING %d\n", val);
+		if (val != so_timestamping_flags)
+			printf("   not the expected value %d\n",
+			       so_timestamping_flags);
+	}
+
+	/* send packets forever every five seconds */
+	gettimeofday(&next, 0);
+	next.tv_sec = (next.tv_sec + 1) / 5 * 5;
+	next.tv_usec = 0;
+	while (1) {
+		struct timeval now;
+		struct timeval delta;
+		long delta_us;
+		int res;
+		fd_set readfs, errorfs;
+
+		gettimeofday(&now, 0);
+		delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 +
+			(long)(next.tv_usec - now.tv_usec);
+		if (delta_us > 0) {
+			/* continue waiting for timeout or data */
+			delta.tv_sec = delta_us / 1000000;
+			delta.tv_usec = delta_us % 1000000;
+
+			FD_ZERO(&readfs);
+			FD_ZERO(&errorfs);
+			FD_SET(sock, &readfs);
+			FD_SET(sock, &errorfs);
+			printf("%ld.%06ld: select %ldus\n",
+			       (long)now.tv_sec, (long)now.tv_usec,
+			       delta_us);
+			res = select(sock + 1, &readfs, 0, &errorfs, &delta);
+			gettimeofday(&now, 0);
+			printf("%ld.%06ld: select returned: %d, %s\n",
+			       (long)now.tv_sec, (long)now.tv_usec,
+			       res,
+			       res < 0 ? strerror(errno) : "success");
+			if (res > 0) {
+				if (FD_ISSET(sock, &readfs))
+					printf("ready for reading\n");
+				if (FD_ISSET(sock, &errorfs))
+					printf("has error\n");
+				recvpacket(sock, 0,
+					   siocgstamp,
+					   siocgstampns);
+				recvpacket(sock, MSG_ERRQUEUE,
+					   siocgstamp,
+					   siocgstampns);
+			}
+		} else {
+			/* write one packet */
+			sendpacket(sock,
+				   (struct sockaddr *)&addr,
+				   sizeof(addr));
+			next.tv_sec += 5;
+			continue;
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c
new file mode 100644
index 000000000..81a98a240
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright 2014 Google Inc.
+ * Author: willemb@google.com (Willem de Bruijn)
+ *
+ * Test software tx timestamping, including
+ *
+ * - SCHED, SND and ACK timestamps
+ * - RAW, UDP and TCP
+ * - IPv4 and IPv6
+ * - various packet sizes (to test GSO and TSO)
+ *
+ * Consult the command line arguments for help on running
+ * the various testcases.
+ *
+ * This test requires a dummy TCP server.
+ * A simple `nc6 [-u] -l -p $DESTPORT` will do
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/types.h>
+#include <error.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/errqueue.h>
+#include <linux/if_ether.h>
+#include <linux/net_tstamp.h>
+#include <netdb.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#include <netpacket/packet.h>
+#include <poll.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+/* command line parameters */
+static int cfg_proto = SOCK_STREAM;
+static int cfg_ipproto = IPPROTO_TCP;
+static int cfg_num_pkts = 4;
+static int do_ipv4 = 1;
+static int do_ipv6 = 1;
+static int cfg_payload_len = 10;
+static int cfg_poll_timeout = 100;
+static bool cfg_show_payload;
+static bool cfg_do_pktinfo;
+static bool cfg_loop_nodata;
+static bool cfg_no_delay;
+static uint16_t dest_port = 9000;
+
+static struct sockaddr_in daddr;
+static struct sockaddr_in6 daddr6;
+static struct timespec ts_prev;
+
+static void __print_timestamp(const char *name, struct timespec *cur,
+			      uint32_t key, int payload_len)
+{
+	if (!(cur->tv_sec | cur->tv_nsec))
+		return;
+
+	fprintf(stderr, "  %s: %lu s %lu us (seq=%u, len=%u)",
+			name, cur->tv_sec, cur->tv_nsec / 1000,
+			key, payload_len);
+
+	if ((ts_prev.tv_sec | ts_prev.tv_nsec)) {
+		int64_t cur_ms, prev_ms;
+
+		cur_ms = (long) cur->tv_sec * 1000 * 1000;
+		cur_ms += cur->tv_nsec / 1000;
+
+		prev_ms = (long) ts_prev.tv_sec * 1000 * 1000;
+		prev_ms += ts_prev.tv_nsec / 1000;
+
+		fprintf(stderr, "  (%+" PRId64 " us)", cur_ms - prev_ms);
+	}
+
+	ts_prev = *cur;
+	fprintf(stderr, "\n");
+}
+
+static void print_timestamp_usr(void)
+{
+	struct timespec ts;
+	struct timeval tv;	/* avoid dependency on -lrt */
+
+	gettimeofday(&tv, NULL);
+	ts.tv_sec = tv.tv_sec;
+	ts.tv_nsec = tv.tv_usec * 1000;
+
+	__print_timestamp("  USR", &ts, 0, 0);
+}
+
+static void print_timestamp(struct scm_timestamping *tss, int tstype,
+			    int tskey, int payload_len)
+{
+	const char *tsname;
+
+	switch (tstype) {
+	case SCM_TSTAMP_SCHED:
+		tsname = "  ENQ";
+		break;
+	case SCM_TSTAMP_SND:
+		tsname = "  SND";
+		break;
+	case SCM_TSTAMP_ACK:
+		tsname = "  ACK";
+		break;
+	default:
+		error(1, 0, "unknown timestamp type: %u",
+		tstype);
+	}
+	__print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
+}
+
+/* TODO: convert to check_and_print payload once API is stable */
+static void print_payload(char *data, int len)
+{
+	int i;
+
+	if (!len)
+		return;
+
+	if (len > 70)
+		len = 70;
+
+	fprintf(stderr, "payload: ");
+	for (i = 0; i < len; i++)
+		fprintf(stderr, "%02hhx ", data[i]);
+	fprintf(stderr, "\n");
+}
+
+static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
+{
+	char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN];
+
+	fprintf(stderr, "         pktinfo: ifindex=%u src=%s dst=%s\n",
+		ifindex,
+		saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown",
+		daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
+}
+
+static void __poll(int fd)
+{
+	struct pollfd pollfd;
+	int ret;
+
+	memset(&pollfd, 0, sizeof(pollfd));
+	pollfd.fd = fd;
+	ret = poll(&pollfd, 1, cfg_poll_timeout);
+	if (ret != 1)
+		error(1, errno, "poll");
+}
+
+static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
+{
+	struct sock_extended_err *serr = NULL;
+	struct scm_timestamping *tss = NULL;
+	struct cmsghdr *cm;
+	int batch = 0;
+
+	for (cm = CMSG_FIRSTHDR(msg);
+	     cm && cm->cmsg_len;
+	     cm = CMSG_NXTHDR(msg, cm)) {
+		if (cm->cmsg_level == SOL_SOCKET &&
+		    cm->cmsg_type == SCM_TIMESTAMPING) {
+			tss = (void *) CMSG_DATA(cm);
+		} else if ((cm->cmsg_level == SOL_IP &&
+			    cm->cmsg_type == IP_RECVERR) ||
+			   (cm->cmsg_level == SOL_IPV6 &&
+			    cm->cmsg_type == IPV6_RECVERR)) {
+			serr = (void *) CMSG_DATA(cm);
+			if (serr->ee_errno != ENOMSG ||
+			    serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
+				fprintf(stderr, "unknown ip error %d %d\n",
+						serr->ee_errno,
+						serr->ee_origin);
+				serr = NULL;
+			}
+		} else if (cm->cmsg_level == SOL_IP &&
+			   cm->cmsg_type == IP_PKTINFO) {
+			struct in_pktinfo *info = (void *) CMSG_DATA(cm);
+			print_pktinfo(AF_INET, info->ipi_ifindex,
+				      &info->ipi_spec_dst, &info->ipi_addr);
+		} else if (cm->cmsg_level == SOL_IPV6 &&
+			   cm->cmsg_type == IPV6_PKTINFO) {
+			struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm);
+			print_pktinfo(AF_INET6, info6->ipi6_ifindex,
+				      NULL, &info6->ipi6_addr);
+		} else
+			fprintf(stderr, "unknown cmsg %d,%d\n",
+					cm->cmsg_level, cm->cmsg_type);
+
+		if (serr && tss) {
+			print_timestamp(tss, serr->ee_info, serr->ee_data,
+					payload_len);
+			serr = NULL;
+			tss = NULL;
+			batch++;
+		}
+	}
+
+	if (batch > 1)
+		fprintf(stderr, "batched %d timestamps\n", batch);
+}
+
+static int recv_errmsg(int fd)
+{
+	static char ctrl[1024 /* overprovision*/];
+	static struct msghdr msg;
+	struct iovec entry;
+	static char *data;
+	int ret = 0;
+
+	data = malloc(cfg_payload_len);
+	if (!data)
+		error(1, 0, "malloc");
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&entry, 0, sizeof(entry));
+	memset(ctrl, 0, sizeof(ctrl));
+
+	entry.iov_base = data;
+	entry.iov_len = cfg_payload_len;
+	msg.msg_iov = &entry;
+	msg.msg_iovlen = 1;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_control = ctrl;
+	msg.msg_controllen = sizeof(ctrl);
+
+	ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+	if (ret == -1 && errno != EAGAIN)
+		error(1, errno, "recvmsg");
+
+	if (ret >= 0) {
+		__recv_errmsg_cmsg(&msg, ret);
+		if (cfg_show_payload)
+			print_payload(data, cfg_payload_len);
+	}
+
+	free(data);
+	return ret == -1;
+}
+
+static void do_test(int family, unsigned int opt)
+{
+	char *buf;
+	int fd, i, val = 1, total_len;
+
+	if (family == AF_INET6 && cfg_proto != SOCK_STREAM) {
+		/* due to lack of checksum generation code */
+		fprintf(stderr, "test: skipping datagram over IPv6\n");
+		return;
+	}
+
+	total_len = cfg_payload_len;
+	if (cfg_proto == SOCK_RAW) {
+		total_len += sizeof(struct udphdr);
+		if (cfg_ipproto == IPPROTO_RAW)
+			total_len += sizeof(struct iphdr);
+	}
+
+	buf = malloc(total_len);
+	if (!buf)
+		error(1, 0, "malloc");
+
+	fd = socket(family, cfg_proto, cfg_ipproto);
+	if (fd < 0)
+		error(1, errno, "socket");
+
+	if (cfg_proto == SOCK_STREAM) {
+		if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
+			       (char*) &val, sizeof(val)))
+			error(1, 0, "setsockopt no nagle");
+
+		if (family == PF_INET) {
+			if (connect(fd, (void *) &daddr, sizeof(daddr)))
+				error(1, errno, "connect ipv4");
+		} else {
+			if (connect(fd, (void *) &daddr6, sizeof(daddr6)))
+				error(1, errno, "connect ipv6");
+		}
+	}
+
+	if (cfg_do_pktinfo) {
+		if (family == AF_INET6) {
+			if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO,
+				       &val, sizeof(val)))
+				error(1, errno, "setsockopt pktinfo ipv6");
+		} else {
+			if (setsockopt(fd, SOL_IP, IP_PKTINFO,
+				       &val, sizeof(val)))
+				error(1, errno, "setsockopt pktinfo ipv4");
+		}
+	}
+
+	opt |= SOF_TIMESTAMPING_SOFTWARE |
+	       SOF_TIMESTAMPING_OPT_CMSG |
+	       SOF_TIMESTAMPING_OPT_ID;
+	if (cfg_loop_nodata)
+		opt |= SOF_TIMESTAMPING_OPT_TSONLY;
+
+	if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+		       (char *) &opt, sizeof(opt)))
+		error(1, 0, "setsockopt timestamping");
+
+	for (i = 0; i < cfg_num_pkts; i++) {
+		memset(&ts_prev, 0, sizeof(ts_prev));
+		memset(buf, 'a' + i, total_len);
+
+		if (cfg_proto == SOCK_RAW) {
+			struct udphdr *udph;
+			int off = 0;
+
+			if (cfg_ipproto == IPPROTO_RAW) {
+				struct iphdr *iph = (void *) buf;
+
+				memset(iph, 0, sizeof(*iph));
+				iph->ihl      = 5;
+				iph->version  = 4;
+				iph->ttl      = 2;
+				iph->daddr    = daddr.sin_addr.s_addr;
+				iph->protocol = IPPROTO_UDP;
+				/* kernel writes saddr, csum, len */
+
+				off = sizeof(*iph);
+			}
+
+			udph = (void *) buf + off;
+			udph->source = ntohs(9000); 	/* random spoof */
+			udph->dest   = ntohs(dest_port);
+			udph->len    = ntohs(sizeof(*udph) + cfg_payload_len);
+			udph->check  = 0;	/* not allowed for IPv6 */
+		}
+
+		print_timestamp_usr();
+		if (cfg_proto != SOCK_STREAM) {
+			if (family == PF_INET)
+				val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr));
+			else
+				val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6));
+		} else {
+			val = send(fd, buf, cfg_payload_len, 0);
+		}
+		if (val != total_len)
+			error(1, errno, "send");
+
+		/* wait for all errors to be queued, else ACKs arrive OOO */
+		if (!cfg_no_delay)
+			usleep(50 * 1000);
+
+		__poll(fd);
+
+		while (!recv_errmsg(fd)) {}
+	}
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	free(buf);
+	usleep(400 * 1000);
+}
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+	fprintf(stderr, "\nUsage: %s [options] hostname\n"
+			"\nwhere options are:\n"
+			"  -4:   only IPv4\n"
+			"  -6:   only IPv6\n"
+			"  -h:   show this message\n"
+			"  -c N: number of packets for each test\n"
+			"  -D:   no delay between packets\n"
+			"  -F:   poll() waits forever for an event\n"
+			"  -I:   request PKTINFO\n"
+			"  -l N: send N bytes at a time\n"
+			"  -n:   set no-payload option\n"
+			"  -r:   use raw\n"
+			"  -R:   use raw (IP_HDRINCL)\n"
+			"  -p N: connect to port N\n"
+			"  -u:   use udp\n"
+			"  -x:   show payload (up to 70 bytes)\n",
+			filepath);
+	exit(1);
+}
+
+static void parse_opt(int argc, char **argv)
+{
+	int proto_count = 0;
+	char c;
+
+	while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) {
+		switch (c) {
+		case '4':
+			do_ipv6 = 0;
+			break;
+		case '6':
+			do_ipv4 = 0;
+			break;
+		case 'c':
+			cfg_num_pkts = strtoul(optarg, NULL, 10);
+			break;
+		case 'D':
+			cfg_no_delay = true;
+			break;
+		case 'F':
+			cfg_poll_timeout = -1;
+			break;
+		case 'I':
+			cfg_do_pktinfo = true;
+			break;
+		case 'n':
+			cfg_loop_nodata = true;
+			break;
+		case 'r':
+			proto_count++;
+			cfg_proto = SOCK_RAW;
+			cfg_ipproto = IPPROTO_UDP;
+			break;
+		case 'R':
+			proto_count++;
+			cfg_proto = SOCK_RAW;
+			cfg_ipproto = IPPROTO_RAW;
+			break;
+		case 'u':
+			proto_count++;
+			cfg_proto = SOCK_DGRAM;
+			cfg_ipproto = IPPROTO_UDP;
+			break;
+		case 'l':
+			cfg_payload_len = strtoul(optarg, NULL, 10);
+			break;
+		case 'p':
+			dest_port = strtoul(optarg, NULL, 10);
+			break;
+		case 'x':
+			cfg_show_payload = true;
+			break;
+		case 'h':
+		default:
+			usage(argv[0]);
+		}
+	}
+
+	if (!cfg_payload_len)
+		error(1, 0, "payload may not be nonzero");
+	if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472)
+		error(1, 0, "udp packet might exceed expected MTU");
+	if (!do_ipv4 && !do_ipv6)
+		error(1, 0, "pass -4 or -6, not both");
+	if (proto_count > 1)
+		error(1, 0, "pass -r, -R or -u, not multiple");
+
+	if (optind != argc - 1)
+		error(1, 0, "missing required hostname argument");
+}
+
+static void resolve_hostname(const char *hostname)
+{
+	struct addrinfo *addrs, *cur;
+	int have_ipv4 = 0, have_ipv6 = 0;
+
+	if (getaddrinfo(hostname, NULL, NULL, &addrs))
+		error(1, errno, "getaddrinfo");
+
+	cur = addrs;
+	while (cur && !have_ipv4 && !have_ipv6) {
+		if (!have_ipv4 && cur->ai_family == AF_INET) {
+			memcpy(&daddr, cur->ai_addr, sizeof(daddr));
+			daddr.sin_port = htons(dest_port);
+			have_ipv4 = 1;
+		}
+		else if (!have_ipv6 && cur->ai_family == AF_INET6) {
+			memcpy(&daddr6, cur->ai_addr, sizeof(daddr6));
+			daddr6.sin6_port = htons(dest_port);
+			have_ipv6 = 1;
+		}
+		cur = cur->ai_next;
+	}
+	if (addrs)
+		freeaddrinfo(addrs);
+
+	do_ipv4 &= have_ipv4;
+	do_ipv6 &= have_ipv6;
+}
+
+static void do_main(int family)
+{
+	fprintf(stderr, "family:       %s\n",
+			family == PF_INET ? "INET" : "INET6");
+
+	fprintf(stderr, "test SND\n");
+	do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE);
+
+	fprintf(stderr, "test ENQ\n");
+	do_test(family, SOF_TIMESTAMPING_TX_SCHED);
+
+	fprintf(stderr, "test ENQ + SND\n");
+	do_test(family, SOF_TIMESTAMPING_TX_SCHED |
+			SOF_TIMESTAMPING_TX_SOFTWARE);
+
+	if (cfg_proto == SOCK_STREAM) {
+		fprintf(stderr, "\ntest ACK\n");
+		do_test(family, SOF_TIMESTAMPING_TX_ACK);
+
+		fprintf(stderr, "\ntest SND + ACK\n");
+		do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_TX_ACK);
+
+		fprintf(stderr, "\ntest ENQ + SND + ACK\n");
+		do_test(family, SOF_TIMESTAMPING_TX_SCHED |
+				SOF_TIMESTAMPING_TX_SOFTWARE |
+				SOF_TIMESTAMPING_TX_ACK);
+	}
+}
+
+const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" };
+
+int main(int argc, char **argv)
+{
+	if (argc == 1)
+		usage(argv[0]);
+
+	parse_opt(argc, argv);
+	resolve_hostname(argv[argc - 1]);
+
+	fprintf(stderr, "protocol:     %s\n", sock_names[cfg_proto]);
+	fprintf(stderr, "payload:      %u\n", cfg_payload_len);
+	fprintf(stderr, "server port:  %u\n", dest_port);
+	fprintf(stderr, "\n");
+
+	if (do_ipv4)
+		do_main(PF_INET);
+	if (do_ipv6)
+		do_main(PF_INET6);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/nsfs/.gitignore
new file mode 100644
index 000000000..2ab2c824c
--- /dev/null
+++ b/tools/testing/selftests/nsfs/.gitignore
@@ -0,0 +1,2 @@
+owner
+pidns
diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/nsfs/Makefile
new file mode 100644
index 000000000..9ff7c7f80
--- /dev/null
+++ b/tools/testing/selftests/nsfs/Makefile
@@ -0,0 +1,5 @@
+TEST_GEN_PROGS := owner pidns
+
+CFLAGS := -Wall -Werror
+
+include ../lib.mk
diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/nsfs/config
new file mode 100644
index 000000000..598d0a225
--- /dev/null
+++ b/tools/testing/selftests/nsfs/config
@@ -0,0 +1,3 @@
+CONFIG_USER_NS=y
+CONFIG_UTS_NS=y
+CONFIG_PID_NS=y
diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/nsfs/owner.c
new file mode 100644
index 000000000..96a976c74
--- /dev/null
+++ b/tools/testing/selftests/nsfs/owner.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#define NSIO    0xb7
+#define NS_GET_USERNS   _IO(NSIO, 0x1)
+
+#define pr_err(fmt, ...) \
+		({ \
+			fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+				__func__, __LINE__, ##__VA_ARGS__); \
+			1; \
+		})
+
+int main(int argc, char *argvp[])
+{
+	int pfd[2], ns, uns, init_uns;
+	struct stat st1, st2;
+	char path[128];
+	pid_t pid;
+	char c;
+
+	if (pipe(pfd))
+		return 1;
+
+	pid = fork();
+	if (pid < 0)
+		return pr_err("fork");
+	if (pid == 0) {
+		prctl(PR_SET_PDEATHSIG, SIGKILL);
+		if (unshare(CLONE_NEWUTS | CLONE_NEWUSER))
+			return pr_err("unshare");
+		close(pfd[0]);
+		close(pfd[1]);
+		while (1)
+			sleep(1);
+		return 0;
+	}
+	close(pfd[1]);
+	if (read(pfd[0], &c, 1) != 0)
+		return pr_err("Unable to read from pipe");
+	close(pfd[0]);
+
+	snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid);
+	ns = open(path, O_RDONLY);
+	if (ns < 0)
+		return pr_err("Unable to open %s", path);
+
+	uns = ioctl(ns, NS_GET_USERNS);
+	if (uns < 0)
+		return pr_err("Unable to get an owning user namespace");
+
+	if (fstat(uns, &st1))
+		return pr_err("fstat");
+
+	snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+	if (stat(path, &st2))
+		return pr_err("stat");
+
+	if (st1.st_ino != st2.st_ino)
+		return pr_err("NS_GET_USERNS returned a wrong namespace");
+
+	init_uns = ioctl(uns, NS_GET_USERNS);
+	if (uns < 0)
+		return pr_err("Unable to get an owning user namespace");
+
+	if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM)
+		return pr_err("Don't get EPERM");
+
+	if (unshare(CLONE_NEWUSER))
+		return pr_err("unshare");
+
+	if (ioctl(ns, NS_GET_USERNS) >= 0 || errno != EPERM)
+		return pr_err("Don't get EPERM");
+	if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM)
+		return pr_err("Don't get EPERM");
+
+	kill(pid, SIGKILL);
+	wait(NULL);
+	return 0;
+}
diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c
new file mode 100644
index 000000000..e0d86e166
--- /dev/null
+++ b/tools/testing/selftests/nsfs/pidns.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#define pr_err(fmt, ...) \
+		({ \
+			fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+				__func__, __LINE__, ##__VA_ARGS__); \
+			1; \
+		})
+
+#define NSIO	0xb7
+#define NS_GET_USERNS   _IO(NSIO, 0x1)
+#define NS_GET_PARENT   _IO(NSIO, 0x2)
+
+#define __stack_aligned__	__attribute__((aligned(16)))
+struct cr_clone_arg {
+	char stack[128] __stack_aligned__;
+	char stack_ptr[0];
+};
+
+static int child(void *args)
+{
+	prctl(PR_SET_PDEATHSIG, SIGKILL);
+	while (1)
+		sleep(1);
+	exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	char *ns_strs[] = {"pid", "user"};
+	char path[] = "/proc/0123456789/ns/pid";
+	struct cr_clone_arg ca;
+	struct stat st1, st2;
+	int ns, pns, i;
+	pid_t pid;
+
+	pid = clone(child, ca.stack_ptr, CLONE_NEWUSER | CLONE_NEWPID | SIGCHLD, NULL);
+	if (pid < 0)
+		return pr_err("clone");
+
+	for (i = 0; i < 2; i++) {
+		snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns_strs[i]);
+		ns = open(path, O_RDONLY);
+		if (ns < 0)
+			return pr_err("Unable to open %s", path);
+
+		pns = ioctl(ns, NS_GET_PARENT);
+		if (pns < 0)
+			return pr_err("Unable to get a parent pidns");
+
+		snprintf(path, sizeof(path), "/proc/self/ns/%s", ns_strs[i]);
+		if (stat(path, &st2))
+			return pr_err("Unable to stat %s", path);
+		if (fstat(pns, &st1))
+			return pr_err("Unable to stat the parent pidns");
+		if (st1.st_ino != st2.st_ino)
+			return pr_err("NS_GET_PARENT returned a wrong namespace");
+
+		if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM)
+			return pr_err("Don't get EPERM");
+	}
+
+	kill(pid, SIGKILL);
+	wait(NULL);
+	return 0;
+}
diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
new file mode 100755
index 000000000..17ca36403
--- /dev/null
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -0,0 +1,590 @@
+#!/bin/bash
+# Copyright (c) 2016 Microsemi. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# Author: Logan Gunthorpe <logang@deltatee.com>
+
+REMOTE_HOST=
+LIST_DEVS=FALSE
+
+DEBUGFS=${DEBUGFS-/sys/kernel/debug}
+
+PERF_RUN_ORDER=32
+MAX_MW_SIZE=0
+RUN_DMA_TESTS=
+DONT_CLEANUP=
+MW_SIZE=65536
+
+function show_help()
+{
+	echo "Usage: $0 [OPTIONS] LOCAL_DEV REMOTE_DEV"
+	echo "Run tests on a pair of NTB endpoints."
+	echo
+	echo "If the NTB device loops back to the same host then,"
+	echo "just specifying the two PCI ids on the command line is"
+	echo "sufficient. Otherwise, if the NTB link spans two hosts"
+	echo "use the -r option to specify the hostname for the remote"
+	echo "device. SSH will then be used to test the remote side."
+	echo "An SSH key between the root users of the host would then"
+	echo "be highly recommended."
+	echo
+	echo "Options:"
+	echo "  -C              don't cleanup ntb modules on exit"
+	echo "  -h              show this help message"
+	echo "  -l              list available local and remote PCI ids"
+	echo "  -r REMOTE_HOST  specify the remote's hostname to connect"
+	echo "                  to for the test (using ssh)"
+	echo "  -m MW_SIZE      memory window size for ntb_tool"
+	echo "                  (default: $MW_SIZE)"
+	echo "  -d              run dma tests for ntb_perf"
+	echo "  -p ORDER        total data order for ntb_perf"
+	echo "                  (default: $PERF_RUN_ORDER)"
+	echo "  -w MAX_MW_SIZE  maxmium memory window size for ntb_perf"
+	echo
+}
+
+function parse_args()
+{
+	OPTIND=0
+	while getopts "b:Cdhlm:r:p:w:" opt; do
+		case "$opt" in
+		C)  DONT_CLEANUP=1 ;;
+		d)  RUN_DMA_TESTS=1 ;;
+		h)  show_help; exit 0 ;;
+		l)  LIST_DEVS=TRUE ;;
+		m)  MW_SIZE=${OPTARG} ;;
+		r)  REMOTE_HOST=${OPTARG} ;;
+		p)  PERF_RUN_ORDER=${OPTARG} ;;
+		w)  MAX_MW_SIZE=${OPTARG} ;;
+		\?)
+		    echo "Invalid option: -$OPTARG" >&2
+		    exit 1
+		    ;;
+		esac
+	done
+}
+
+parse_args "$@"
+shift $((OPTIND-1))
+LOCAL_DEV=$1
+shift
+parse_args "$@"
+shift $((OPTIND-1))
+REMOTE_DEV=$1
+shift
+parse_args "$@"
+
+set -e
+
+function _modprobe()
+{
+	modprobe "$@"
+
+	if [[ "$REMOTE_HOST" != "" ]]; then
+		ssh "$REMOTE_HOST" modprobe "$@"
+	fi
+}
+
+function split_remote()
+{
+	VPATH=$1
+	REMOTE=
+
+	if [[ "$VPATH" == *":/"* ]]; then
+		REMOTE=${VPATH%%:*}
+		VPATH=${VPATH#*:}
+	fi
+}
+
+function read_file()
+{
+	split_remote $1
+	if [[ "$REMOTE" != "" ]]; then
+		ssh "$REMOTE" cat "$VPATH"
+	else
+		cat "$VPATH"
+	fi
+}
+
+function write_file()
+{
+	split_remote $2
+	VALUE=$1
+
+	if [[ "$REMOTE" != "" ]]; then
+		ssh "$REMOTE" "echo \"$VALUE\" > \"$VPATH\""
+	else
+		echo "$VALUE" > "$VPATH"
+	fi
+}
+
+function check_file()
+{
+	split_remote $1
+
+	if [[ "$REMOTE" != "" ]]; then
+		ssh "$REMOTE" "[[ -e ${VPATH} ]]"
+	else
+		[[ -e ${VPATH} ]]
+	fi
+}
+
+function subdirname()
+{
+	echo $(basename $(dirname $1)) 2> /dev/null
+}
+
+function find_pidx()
+{
+	PORT=$1
+	PPATH=$2
+
+	for ((i = 0; i < 64; i++)); do
+		PEER_DIR="$PPATH/peer$i"
+
+		check_file ${PEER_DIR} || break
+
+		PEER_PORT=$(read_file "${PEER_DIR}/port")
+		if [[ ${PORT} -eq $PEER_PORT ]]; then
+			echo $i
+			return 0
+		fi
+	done
+
+	return 1
+}
+
+function port_test()
+{
+	LOC=$1
+	REM=$2
+
+	echo "Running port tests on: $(basename $LOC) / $(basename $REM)"
+
+	LOCAL_PORT=$(read_file "$LOC/port")
+	REMOTE_PORT=$(read_file "$REM/port")
+
+	LOCAL_PIDX=$(find_pidx ${REMOTE_PORT} "$LOC")
+	REMOTE_PIDX=$(find_pidx ${LOCAL_PORT} "$REM")
+
+	echo "Local port ${LOCAL_PORT} with index ${REMOTE_PIDX} on remote host"
+	echo "Peer port ${REMOTE_PORT} with index ${LOCAL_PIDX} on local host"
+
+	echo "  Passed"
+}
+
+function link_test()
+{
+	LOC=$1
+	REM=$2
+	EXP=0
+
+	echo "Running link tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+	if ! write_file "N" "$LOC/../link" 2> /dev/null; then
+		echo "  Unsupported"
+		return
+	fi
+
+	write_file "N" "$LOC/link_event"
+
+	if [[ $(read_file "$REM/link") != "N" ]]; then
+		echo "Expected link to be down in $REM/link" >&2
+		exit -1
+	fi
+
+	write_file "Y" "$LOC/../link"
+
+	echo "  Passed"
+}
+
+function doorbell_test()
+{
+	LOC=$1
+	REM=$2
+	EXP=0
+
+	echo "Running db tests on: $(basename $LOC) / $(basename $REM)"
+
+	DB_VALID_MASK=$(read_file "$LOC/db_valid_mask")
+
+	write_file "c $DB_VALID_MASK" "$REM/db"
+
+	for ((i = 0; i < 64; i++)); do
+		DB=$(read_file "$REM/db")
+		if [[ "$DB" -ne "$EXP" ]]; then
+			echo "Doorbell doesn't match expected value $EXP " \
+			     "in $REM/db" >&2
+			exit -1
+		fi
+
+		let "MASK = (1 << $i) & $DB_VALID_MASK" || true
+		let "EXP = $EXP | $MASK" || true
+
+		write_file "s $MASK" "$LOC/peer_db"
+	done
+
+	write_file "c $DB_VALID_MASK" "$REM/db_mask"
+	write_file $DB_VALID_MASK "$REM/db_event"
+	write_file "s $DB_VALID_MASK" "$REM/db_mask"
+
+	write_file "c $DB_VALID_MASK" "$REM/db"
+
+	echo "  Passed"
+}
+
+function get_files_count()
+{
+	NAME=$1
+	LOC=$2
+
+	split_remote $LOC
+
+	if [[ "$REMOTE" == "" ]]; then
+		echo $(ls -1 "$VPATH"/${NAME}* 2>/dev/null | wc -l)
+	else
+		echo $(ssh "$REMOTE" "ls -1 \"$VPATH\"/${NAME}* | \
+		       wc -l" 2> /dev/null)
+	fi
+}
+
+function scratchpad_test()
+{
+	LOC=$1
+	REM=$2
+
+	echo "Running spad tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+	CNT=$(get_files_count "spad" "$LOC")
+
+	if [[ $CNT -eq 0 ]]; then
+		echo "  Unsupported"
+		return
+	fi
+
+	for ((i = 0; i < $CNT; i++)); do
+		VAL=$RANDOM
+		write_file "$VAL" "$LOC/spad$i"
+		RVAL=$(read_file "$REM/../spad$i")
+
+		if [[ "$VAL" -ne "$RVAL" ]]; then
+			echo "Scratchpad $i value $RVAL doesn't match $VAL" >&2
+			exit -1
+		fi
+	done
+
+	echo "  Passed"
+}
+
+function message_test()
+{
+	LOC=$1
+	REM=$2
+
+	echo "Running msg tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+	CNT=$(get_files_count "msg" "$LOC")
+
+	if [[ $CNT -eq 0 ]]; then
+		echo "  Unsupported"
+		return
+	fi
+
+	MSG_OUTBITS_MASK=$(read_file "$LOC/../msg_inbits")
+	MSG_INBITS_MASK=$(read_file "$REM/../msg_inbits")
+
+	write_file "c $MSG_OUTBITS_MASK" "$LOC/../msg_sts"
+	write_file "c $MSG_INBITS_MASK" "$REM/../msg_sts"
+
+	for ((i = 0; i < $CNT; i++)); do
+		VAL=$RANDOM
+		write_file "$VAL" "$LOC/msg$i"
+		RVAL=$(read_file "$REM/../msg$i")
+
+		if [[ "$VAL" -ne "${RVAL%%<-*}" ]]; then
+			echo "Message $i value $RVAL doesn't match $VAL" >&2
+			exit -1
+		fi
+	done
+
+	echo "  Passed"
+}
+
+function get_number()
+{
+	KEY=$1
+
+	sed -n "s/^\(${KEY}\)[ \t]*\(0x[0-9a-fA-F]*\)\(\[p\]\)\?$/\2/p"
+}
+
+function mw_alloc()
+{
+	IDX=$1
+	LOC=$2
+	REM=$3
+
+	write_file $MW_SIZE "$LOC/mw_trans$IDX"
+
+	INB_MW=$(read_file "$LOC/mw_trans$IDX")
+	MW_ALIGNED_SIZE=$(echo "$INB_MW" | get_number "Window Size")
+	MW_DMA_ADDR=$(echo "$INB_MW" | get_number "DMA Address")
+
+	write_file "$MW_DMA_ADDR:$(($MW_ALIGNED_SIZE))" "$REM/peer_mw_trans$IDX"
+
+	if [[ $MW_SIZE -ne $MW_ALIGNED_SIZE ]]; then
+		echo "MW $IDX size aligned to $MW_ALIGNED_SIZE"
+	fi
+}
+
+function write_mw()
+{
+	split_remote $2
+
+	if [[ "$REMOTE" != "" ]]; then
+		ssh "$REMOTE" \
+			dd if=/dev/urandom "of=$VPATH" 2> /dev/null || true
+	else
+		dd if=/dev/urandom "of=$VPATH" 2> /dev/null || true
+	fi
+}
+
+function mw_check()
+{
+	IDX=$1
+	LOC=$2
+	REM=$3
+
+	write_mw "$LOC/mw$IDX"
+
+	split_remote "$LOC/mw$IDX"
+	if [[ "$REMOTE" == "" ]]; then
+		A=$VPATH
+	else
+		A=/tmp/ntb_test.$$.A
+		ssh "$REMOTE" cat "$VPATH" > "$A"
+	fi
+
+	split_remote "$REM/peer_mw$IDX"
+	if [[ "$REMOTE" == "" ]]; then
+		B=$VPATH
+	else
+		B=/tmp/ntb_test.$$.B
+		ssh "$REMOTE" cat "$VPATH" > "$B"
+	fi
+
+	cmp -n $MW_ALIGNED_SIZE "$A" "$B"
+	if [[ $? != 0 ]]; then
+		echo "Memory window $MW did not match!" >&2
+	fi
+
+	if [[ "$A" == "/tmp/*" ]]; then
+		rm "$A"
+	fi
+
+	if [[ "$B" == "/tmp/*" ]]; then
+		rm "$B"
+	fi
+}
+
+function mw_free()
+{
+	IDX=$1
+	LOC=$2
+	REM=$3
+
+	write_file "$MW_DMA_ADDR:0" "$REM/peer_mw_trans$IDX"
+
+	write_file 0 "$LOC/mw_trans$IDX"
+}
+
+function mw_test()
+{
+	LOC=$1
+	REM=$2
+
+	CNT=$(get_files_count "mw_trans" "$LOC")
+
+	for ((i = 0; i < $CNT; i++)); do
+		echo "Running mw$i tests on: $(subdirname $LOC) / " \
+		     "$(subdirname $REM)"
+
+		mw_alloc $i $LOC $REM
+
+		mw_check $i $LOC $REM
+
+		mw_free $i $LOC  $REM
+
+		echo "  Passed"
+	done
+
+}
+
+function pingpong_test()
+{
+	LOC=$1
+	REM=$2
+
+	echo "Running ping pong tests on: $(basename $LOC) / $(basename $REM)"
+
+	LOC_START=$(read_file "$LOC/count")
+	REM_START=$(read_file "$REM/count")
+
+	sleep 7
+
+	LOC_END=$(read_file "$LOC/count")
+	REM_END=$(read_file "$REM/count")
+
+	if [[ $LOC_START == $LOC_END ]] || [[ $REM_START == $REM_END ]]; then
+		echo "Ping pong counter not incrementing!" >&2
+		exit 1
+	fi
+
+	echo "  Passed"
+}
+
+function perf_test()
+{
+	USE_DMA=$1
+
+	if [[ $USE_DMA == "1" ]]; then
+		WITH="with"
+	else
+		WITH="without"
+	fi
+
+	_modprobe ntb_perf total_order=$PERF_RUN_ORDER \
+		max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA
+
+	echo "Running local perf test $WITH DMA"
+	write_file "$LOCAL_PIDX" "$LOCAL_PERF/run"
+	echo -n "  "
+	read_file "$LOCAL_PERF/run"
+	echo "  Passed"
+
+	echo "Running remote perf test $WITH DMA"
+	write_file "$REMOTE_PIDX" "$REMOTE_PERF/run"
+	echo -n "  "
+	read_file "$REMOTE_PERF/run"
+	echo "  Passed"
+
+	_modprobe -r ntb_perf
+}
+
+function ntb_tool_tests()
+{
+	LOCAL_TOOL="$DEBUGFS/ntb_tool/$LOCAL_DEV"
+	REMOTE_TOOL="$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV"
+
+	echo "Starting ntb_tool tests..."
+
+	_modprobe ntb_tool
+
+	port_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+
+	LOCAL_PEER_TOOL="$LOCAL_TOOL/peer$LOCAL_PIDX"
+	REMOTE_PEER_TOOL="$REMOTE_TOOL/peer$REMOTE_PIDX"
+
+	link_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+	link_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+	#Ensure the link is up on both sides before continuing
+	write_file "Y" "$LOCAL_PEER_TOOL/link_event"
+	write_file "Y" "$REMOTE_PEER_TOOL/link_event"
+
+	doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+	doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL"
+
+	scratchpad_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+	scratchpad_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+	message_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+	message_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+	mw_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+	mw_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+	_modprobe -r ntb_tool
+}
+
+function ntb_pingpong_tests()
+{
+	LOCAL_PP="$DEBUGFS/ntb_pingpong/$LOCAL_DEV"
+	REMOTE_PP="$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV"
+
+	echo "Starting ntb_pingpong tests..."
+
+	_modprobe ntb_pingpong
+
+	pingpong_test $LOCAL_PP $REMOTE_PP
+
+	_modprobe -r ntb_pingpong
+}
+
+function ntb_perf_tests()
+{
+	LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV"
+	REMOTE_PERF="$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV"
+
+	echo "Starting ntb_perf tests..."
+
+	perf_test 0
+
+	if [[ $RUN_DMA_TESTS ]]; then
+		perf_test 1
+	fi
+}
+
+function cleanup()
+{
+	set +e
+	_modprobe -r ntb_tool 2> /dev/null
+	_modprobe -r ntb_perf 2> /dev/null
+	_modprobe -r ntb_pingpong 2> /dev/null
+	_modprobe -r ntb_transport 2> /dev/null
+	set -e
+}
+
+cleanup
+
+if ! [[ $$DONT_CLEANUP ]]; then
+	trap cleanup EXIT
+fi
+
+if [ "$(id -u)" != "0" ]; then
+	echo "This script must be run as root" 1>&2
+	exit 1
+fi
+
+if [[ "$LIST_DEVS" == TRUE ]]; then
+	echo "Local Devices:"
+	ls -1 /sys/bus/ntb/devices
+	echo
+
+	if [[ "$REMOTE_HOST" != "" ]]; then
+		echo "Remote Devices:"
+		ssh $REMOTE_HOST ls -1 /sys/bus/ntb/devices
+	fi
+
+	exit 0
+fi
+
+if [[ "$LOCAL_DEV" == $"" ]] || [[ "$REMOTE_DEV" == $"" ]]; then
+	show_help
+	exit 1
+fi
+
+ntb_tool_tests
+echo
+ntb_pingpong_tests
+echo
+ntb_perf_tests
+echo
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
new file mode 100644
index 000000000..201b59855
--- /dev/null
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for powerpc selftests
+
+# ARCH can be overridden by the user for cross compiling
+ARCH ?= $(shell uname -m)
+ARCH := $(shell echo $(ARCH) | sed -e s/ppc.*/powerpc/)
+
+ifeq ($(ARCH),powerpc)
+
+GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
+
+CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS)
+
+export CFLAGS
+
+SUB_DIRS = alignment		\
+	   benchmarks		\
+	   cache_shape		\
+	   copyloops		\
+	   dscr			\
+	   mm			\
+	   pmu			\
+	   signal		\
+	   primitives		\
+	   stringloops		\
+	   switch_endian	\
+	   syscalls		\
+	   tm			\
+	   vphn         \
+	   math		\
+	   ptrace
+
+endif
+
+all: $(SUB_DIRS)
+
+$(SUB_DIRS):
+	BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
+
+include ../lib.mk
+
+override define RUN_TESTS
+	@for TARGET in $(SUB_DIRS); do \
+		BUILD_TARGET=$(OUTPUT)/$$TARGET;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+	done;
+endef
+
+override define INSTALL_RULE
+	@for TARGET in $(SUB_DIRS); do \
+		BUILD_TARGET=$(OUTPUT)/$$TARGET;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\
+	done;
+endef
+
+override define EMIT_TESTS
+	@for TARGET in $(SUB_DIRS); do \
+		BUILD_TARGET=$(OUTPUT)/$$TARGET;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\
+	done;
+endef
+
+override define CLEAN
+	@for TARGET in $(SUB_DIRS); do \
+		BUILD_TARGET=$(OUTPUT)/$$TARGET;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \
+	done;
+	rm -f tags
+endef
+
+tags:
+	find . -name '*.c' -o -name '*.h' | xargs ctags
+
+.PHONY: tags $(SUB_DIRS)
diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore
new file mode 100644
index 000000000..6d4fd0145
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/.gitignore
@@ -0,0 +1,2 @@
+copy_first_unaligned
+alignment_handler
diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
new file mode 100644
index 000000000..d056486f4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -0,0 +1,6 @@
+TEST_GEN_PROGS := copy_first_unaligned alignment_handler
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
new file mode 100644
index 000000000..4f8335e0c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -0,0 +1,575 @@
+/*
+ * Test the powerpc alignment handler on POWER8/POWER9
+ *
+ * Copyright (C) 2017 IBM Corporation (Michael Neuling, Andrew Donnellan)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * This selftest exercises the powerpc alignment fault handler.
+ *
+ * We create two sets of source and destination buffers, one in regular memory,
+ * the other cache-inhibited (we use /dev/fb0 for this).
+ *
+ * We initialise the source buffers, then use whichever set of load/store
+ * instructions is under test to copy bytes from the source buffers to the
+ * destination buffers. For the regular buffers, these instructions will
+ * execute normally. For the cache-inhibited buffers, these instructions
+ * will trap and cause an alignment fault, and the alignment fault handler
+ * will emulate the particular instruction under test. We then compare the
+ * destination buffers to ensure that the native and emulated cases give the
+ * same result.
+ *
+ * TODO:
+ *   - Any FIXMEs below
+ *   - Test VSX regs < 32 and > 32
+ *   - Test all loads and stores
+ *   - Check update forms do update register
+ *   - Test alignment faults over page boundary
+ *
+ * Some old binutils may not support all the instructions.
+ */
+
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include <asm/cputable.h>
+
+#include "utils.h"
+
+int bufsize;
+int debug;
+int testing;
+volatile int gotsig;
+
+void sighandler(int sig, siginfo_t *info, void *ctx)
+{
+	ucontext_t *ucp = ctx;
+
+	if (!testing) {
+		signal(sig, SIG_DFL);
+		kill(0, sig);
+	}
+	gotsig = sig;
+#ifdef __powerpc64__
+	ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+#else
+	ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
+#endif
+}
+
+#define XFORM(reg, n)  " " #reg " ,%"#n",%2 ;"
+#define DFORM(reg, n)  " " #reg " ,0(%"#n") ;"
+
+#define TEST(name, ld_op, st_op, form, ld_reg, st_reg)		\
+	void test_##name(char *s, char *d)			\
+	{							\
+		asm volatile(					\
+			#ld_op form(ld_reg, 0)			\
+			#st_op form(st_reg, 1)			\
+			:: "r"(s), "r"(d), "r"(0)		\
+			: "memory", "vs0", "vs32", "r31");	\
+	}							\
+	rc |= do_test(#name, test_##name)
+
+#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
+#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
+#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
+#define STORE_VSX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 32)
+#define LOAD_VMX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 0, 32)
+#define STORE_VMX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 0)
+#define LOAD_VMX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 0, 32)
+#define STORE_VMX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 0)
+
+#define LOAD_XFORM_TEST(op) TEST(op, op, stdx, XFORM, 31, 31)
+#define STORE_XFORM_TEST(op) TEST(op, ldx, op, XFORM, 31, 31)
+#define LOAD_DFORM_TEST(op) TEST(op, op, std, DFORM, 31, 31)
+#define STORE_DFORM_TEST(op) TEST(op, ld, op, DFORM, 31, 31)
+
+#define LOAD_FLOAT_DFORM_TEST(op)  TEST(op, op, stfd, DFORM, 0, 0)
+#define STORE_FLOAT_DFORM_TEST(op) TEST(op, lfd, op, DFORM, 0, 0)
+#define LOAD_FLOAT_XFORM_TEST(op)  TEST(op, op, stfdx, XFORM, 0, 0)
+#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
+
+
+/* FIXME: Unimplemented tests: */
+// STORE_DFORM_TEST(stq)   /* FIXME: need two registers for quad */
+// STORE_DFORM_TEST(stswi) /* FIXME: string instruction */
+
+// STORE_XFORM_TEST(stwat) /* AMO can't emulate or run on CI */
+// STORE_XFORM_TEST(stdat) /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
+
+
+/* preload byte by byte */
+void preload_data(void *dst, int offset, int width)
+{
+	char *c = dst;
+	int i;
+
+	c += offset;
+
+	for (i = 0 ; i < width ; i++)
+		c[i] = i;
+}
+
+int test_memcpy(void *dst, void *src, int size, int offset,
+		void (*test_func)(char *, char *))
+{
+	char *s, *d;
+
+	s = src;
+	s += offset;
+	d = dst;
+	d += offset;
+
+	assert(size == 16);
+	gotsig = 0;
+	testing = 1;
+
+	test_func(s, d); /* run the actual test */
+
+	testing = 0;
+	if (gotsig) {
+		if (debug)
+			printf("  Got signal %i\n", gotsig);
+		return 1;
+	}
+	return 0;
+}
+
+void dumpdata(char *s1, char *s2, int n, char *test_name)
+{
+	int i;
+
+	printf("  %s: unexpected result:\n", test_name);
+	printf("    mem:");
+	for (i = 0; i < n; i++)
+		printf(" %02x", s1[i]);
+	printf("\n");
+	printf("    ci: ");
+	for (i = 0; i < n; i++)
+		printf(" %02x", s2[i]);
+	printf("\n");
+}
+
+int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name)
+{
+	char *s1c, *s2c;
+
+	s1c = s1;
+	s1c += offset;
+	s2c = s2;
+	s2c += offset;
+
+	if (memcmp(s1c, s2c, n)) {
+		if (debug) {
+			printf("\n  Compare failed. Offset:%i length:%i\n",
+			       offset, n);
+			dumpdata(s1c, s2c, n, test_name);
+		}
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Do two memcpy tests using the same instructions. One cachable
+ * memory and the other doesn't.
+ */
+int do_test(char *test_name, void (*test_func)(char *, char *))
+{
+	int offset, width, fd, rc, r;
+	void *mem0, *mem1, *ci0, *ci1;
+
+	printf("\tDoing %s:\t", test_name);
+
+	fd = open("/dev/fb0", O_RDWR);
+	if (fd < 0) {
+		printf("\n");
+		perror("Can't open /dev/fb0 now?");
+		return 1;
+	}
+
+	ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+		   fd, 0x0);
+	ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+		   fd, bufsize);
+	if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
+		printf("\n");
+		perror("mmap failed");
+		SKIP_IF(1);
+	}
+
+	rc = posix_memalign(&mem0, bufsize, bufsize);
+	if (rc) {
+		printf("\n");
+		return rc;
+	}
+
+	rc = posix_memalign(&mem1, bufsize, bufsize);
+	if (rc) {
+		printf("\n");
+		free(mem0);
+		return rc;
+	}
+
+	rc = 0;
+	/* offset = 0 no alignment fault, so skip */
+	for (offset = 1; offset < 16; offset++) {
+		width = 16; /* vsx == 16 bytes */
+		r = 0;
+
+		/* load pattern into memory byte by byte */
+		preload_data(ci0, offset, width);
+		preload_data(mem0, offset, width); // FIXME: remove??
+		memcpy(ci0, mem0, bufsize);
+		memcpy(ci1, mem1, bufsize); /* initialise output to the same */
+
+		/* sanity check */
+		test_memcmp(mem0, ci0, width, offset, test_name);
+
+		r |= test_memcpy(ci1,  ci0,  width, offset, test_func);
+		r |= test_memcpy(mem1, mem0, width, offset, test_func);
+		if (r && !debug) {
+			printf("FAILED: Got signal");
+			rc = 1;
+			break;
+		}
+
+		r |= test_memcmp(mem1, ci1, width, offset, test_name);
+		if (r && !debug) {
+			printf("FAILED: Wrong Data");
+			rc = 1;
+			break;
+		}
+	}
+
+	if (rc == 0)
+		printf("PASSED");
+
+	printf("\n");
+
+	munmap(ci0, bufsize);
+	munmap(ci1, bufsize);
+	free(mem0);
+	free(mem1);
+	close(fd);
+
+	return rc;
+}
+
+static bool can_open_fb0(void)
+{
+	int fd;
+
+	fd = open("/dev/fb0", O_RDWR);
+	if (fd < 0)
+		return false;
+
+	close(fd);
+	return true;
+}
+
+int test_alignment_handler_vsx_206(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+	printf("VSX: 2.06B\n");
+	LOAD_VSX_XFORM_TEST(lxvd2x);
+	LOAD_VSX_XFORM_TEST(lxvw4x);
+	LOAD_VSX_XFORM_TEST(lxsdx);
+	LOAD_VSX_XFORM_TEST(lxvdsx);
+	STORE_VSX_XFORM_TEST(stxvd2x);
+	STORE_VSX_XFORM_TEST(stxvw4x);
+	STORE_VSX_XFORM_TEST(stxsdx);
+	return rc;
+}
+
+int test_alignment_handler_vsx_207(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+	printf("VSX: 2.07B\n");
+	LOAD_VSX_XFORM_TEST(lxsspx);
+	LOAD_VSX_XFORM_TEST(lxsiwax);
+	LOAD_VSX_XFORM_TEST(lxsiwzx);
+	STORE_VSX_XFORM_TEST(stxsspx);
+	STORE_VSX_XFORM_TEST(stxsiwx);
+	return rc;
+}
+
+int test_alignment_handler_vsx_300(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+	printf("VSX: 3.00B\n");
+	LOAD_VMX_DFORM_TEST(lxsd);
+	LOAD_VSX_XFORM_TEST(lxsibzx);
+	LOAD_VSX_XFORM_TEST(lxsihzx);
+	LOAD_VMX_DFORM_TEST(lxssp);
+	LOAD_VSX_DFORM_TEST(lxv);
+	LOAD_VSX_XFORM_TEST(lxvb16x);
+	LOAD_VSX_XFORM_TEST(lxvh8x);
+	LOAD_VSX_XFORM_TEST(lxvx);
+	LOAD_VSX_XFORM_TEST(lxvwsx);
+	LOAD_VSX_XFORM_TEST(lxvl);
+	LOAD_VSX_XFORM_TEST(lxvll);
+	STORE_VMX_DFORM_TEST(stxsd);
+	STORE_VSX_XFORM_TEST(stxsibx);
+	STORE_VSX_XFORM_TEST(stxsihx);
+	STORE_VMX_DFORM_TEST(stxssp);
+	STORE_VSX_DFORM_TEST(stxv);
+	STORE_VSX_XFORM_TEST(stxvb16x);
+	STORE_VSX_XFORM_TEST(stxvh8x);
+	STORE_VSX_XFORM_TEST(stxvx);
+	STORE_VSX_XFORM_TEST(stxvl);
+	STORE_VSX_XFORM_TEST(stxvll);
+	return rc;
+}
+
+int test_alignment_handler_integer(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+
+	printf("Integer\n");
+	LOAD_DFORM_TEST(lbz);
+	LOAD_DFORM_TEST(lbzu);
+	LOAD_XFORM_TEST(lbzx);
+	LOAD_XFORM_TEST(lbzux);
+	LOAD_DFORM_TEST(lhz);
+	LOAD_DFORM_TEST(lhzu);
+	LOAD_XFORM_TEST(lhzx);
+	LOAD_XFORM_TEST(lhzux);
+	LOAD_DFORM_TEST(lha);
+	LOAD_DFORM_TEST(lhau);
+	LOAD_XFORM_TEST(lhax);
+	LOAD_XFORM_TEST(lhaux);
+	LOAD_XFORM_TEST(lhbrx);
+	LOAD_DFORM_TEST(lwz);
+	LOAD_DFORM_TEST(lwzu);
+	LOAD_XFORM_TEST(lwzx);
+	LOAD_XFORM_TEST(lwzux);
+	LOAD_DFORM_TEST(lwa);
+	LOAD_XFORM_TEST(lwax);
+	LOAD_XFORM_TEST(lwaux);
+	LOAD_XFORM_TEST(lwbrx);
+	LOAD_DFORM_TEST(ld);
+	LOAD_DFORM_TEST(ldu);
+	LOAD_XFORM_TEST(ldx);
+	LOAD_XFORM_TEST(ldux);
+	STORE_DFORM_TEST(stb);
+	STORE_XFORM_TEST(stbx);
+	STORE_DFORM_TEST(stbu);
+	STORE_XFORM_TEST(stbux);
+	STORE_DFORM_TEST(sth);
+	STORE_XFORM_TEST(sthx);
+	STORE_DFORM_TEST(sthu);
+	STORE_XFORM_TEST(sthux);
+	STORE_XFORM_TEST(sthbrx);
+	STORE_DFORM_TEST(stw);
+	STORE_XFORM_TEST(stwx);
+	STORE_DFORM_TEST(stwu);
+	STORE_XFORM_TEST(stwux);
+	STORE_XFORM_TEST(stwbrx);
+	STORE_DFORM_TEST(std);
+	STORE_XFORM_TEST(stdx);
+	STORE_DFORM_TEST(stdu);
+	STORE_XFORM_TEST(stdux);
+
+#ifdef __BIG_ENDIAN__
+	LOAD_DFORM_TEST(lmw);
+	STORE_DFORM_TEST(stmw);
+#endif
+
+	return rc;
+}
+
+int test_alignment_handler_integer_206(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+	printf("Integer: 2.06\n");
+
+	LOAD_XFORM_TEST(ldbrx);
+	STORE_XFORM_TEST(stdbrx);
+
+	return rc;
+}
+
+int test_alignment_handler_vmx(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
+
+	printf("VMX\n");
+	LOAD_VMX_XFORM_TEST(lvx);
+
+	/*
+	 * FIXME: These loads only load part of the register, so our
+	 * testing method doesn't work. Also they don't take alignment
+	 * faults, so it's kinda pointless anyway
+	 *
+	 LOAD_VMX_XFORM_TEST(lvebx)
+	 LOAD_VMX_XFORM_TEST(lvehx)
+	 LOAD_VMX_XFORM_TEST(lvewx)
+	 LOAD_VMX_XFORM_TEST(lvxl)
+	*/
+	STORE_VMX_XFORM_TEST(stvx);
+	STORE_VMX_XFORM_TEST(stvebx);
+	STORE_VMX_XFORM_TEST(stvehx);
+	STORE_VMX_XFORM_TEST(stvewx);
+	STORE_VMX_XFORM_TEST(stvxl);
+	return rc;
+}
+
+int test_alignment_handler_fp(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+
+	printf("Floating point\n");
+	LOAD_FLOAT_DFORM_TEST(lfd);
+	LOAD_FLOAT_XFORM_TEST(lfdx);
+	LOAD_FLOAT_DFORM_TEST(lfdu);
+	LOAD_FLOAT_XFORM_TEST(lfdux);
+	LOAD_FLOAT_DFORM_TEST(lfs);
+	LOAD_FLOAT_XFORM_TEST(lfsx);
+	LOAD_FLOAT_DFORM_TEST(lfsu);
+	LOAD_FLOAT_XFORM_TEST(lfsux);
+	STORE_FLOAT_DFORM_TEST(stfd);
+	STORE_FLOAT_XFORM_TEST(stfdx);
+	STORE_FLOAT_DFORM_TEST(stfdu);
+	STORE_FLOAT_XFORM_TEST(stfdux);
+	STORE_FLOAT_DFORM_TEST(stfs);
+	STORE_FLOAT_XFORM_TEST(stfsx);
+	STORE_FLOAT_DFORM_TEST(stfsu);
+	STORE_FLOAT_XFORM_TEST(stfsux);
+	STORE_FLOAT_XFORM_TEST(stfiwx);
+
+	return rc;
+}
+
+int test_alignment_handler_fp_205(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
+
+	printf("Floating point: 2.05\n");
+
+	LOAD_FLOAT_DFORM_TEST(lfdp);
+	LOAD_FLOAT_XFORM_TEST(lfdpx);
+	LOAD_FLOAT_XFORM_TEST(lfiwax);
+	STORE_FLOAT_DFORM_TEST(stfdp);
+	STORE_FLOAT_XFORM_TEST(stfdpx);
+
+	return rc;
+}
+
+int test_alignment_handler_fp_206(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+	printf("Floating point: 2.06\n");
+
+	LOAD_FLOAT_XFORM_TEST(lfiwzx);
+
+	return rc;
+}
+
+void usage(char *prog)
+{
+	printf("Usage: %s [options]\n", prog);
+	printf("  -d	Enable debug error output\n");
+	printf("\n");
+	printf("This test requires a POWER8 or POWER9 CPU and a usable ");
+	printf("framebuffer at /dev/fb0.\n");
+}
+
+int main(int argc, char *argv[])
+{
+
+	struct sigaction sa;
+	int rc = 0;
+	int option = 0;
+
+	while ((option = getopt(argc, argv, "d")) != -1) {
+		switch (option) {
+		case 'd':
+			debug++;
+			break;
+		default:
+			usage(argv[0]);
+			exit(1);
+		}
+	}
+
+	bufsize = getpagesize();
+
+	sa.sa_sigaction = sighandler;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGSEGV, &sa, NULL) == -1
+	    || sigaction(SIGBUS, &sa, NULL) == -1
+	    || sigaction(SIGILL, &sa, NULL) == -1) {
+		perror("sigaction");
+		exit(1);
+	}
+
+	rc |= test_harness(test_alignment_handler_vsx_206,
+			   "test_alignment_handler_vsx_206");
+	rc |= test_harness(test_alignment_handler_vsx_207,
+			   "test_alignment_handler_vsx_207");
+	rc |= test_harness(test_alignment_handler_vsx_300,
+			   "test_alignment_handler_vsx_300");
+	rc |= test_harness(test_alignment_handler_integer,
+			   "test_alignment_handler_integer");
+	rc |= test_harness(test_alignment_handler_integer_206,
+			   "test_alignment_handler_integer_206");
+	rc |= test_harness(test_alignment_handler_vmx,
+			   "test_alignment_handler_vmx");
+	rc |= test_harness(test_alignment_handler_fp,
+			   "test_alignment_handler_fp");
+	rc |= test_harness(test_alignment_handler_fp_205,
+			   "test_alignment_handler_fp_205");
+	rc |= test_harness(test_alignment_handler_fp_206,
+			   "test_alignment_handler_fp_206");
+	return rc;
+}
diff --git a/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c
new file mode 100644
index 000000000..5a9589987
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2016, Chris Smart, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Calls to copy_first which are not 128-byte aligned should be
+ * caught and sent a SIGBUS.
+ *
+ */
+
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+#include "utils.h"
+#include "instructions.h"
+
+unsigned int expected_instruction = PPC_INST_COPY_FIRST;
+unsigned int instruction_mask = 0xfc2007fe;
+
+void signal_action_handler(int signal_num, siginfo_t *info, void *ptr)
+{
+	ucontext_t *ctx = ptr;
+#ifdef __powerpc64__
+	unsigned int *pc = (unsigned int *)ctx->uc_mcontext.gp_regs[PT_NIP];
+#else
+	unsigned int *pc = (unsigned int *)ctx->uc_mcontext.uc_regs->gregs[PT_NIP];
+#endif
+
+	/*
+	 * Check that the signal was on the correct instruction, using a
+	 * mask because the compiler assigns the register at RB.
+	 */
+	if ((*pc & instruction_mask) == expected_instruction)
+		_exit(0); /* We hit the right instruction */
+
+	_exit(1);
+}
+
+void setup_signal_handler(void)
+{
+	struct sigaction signal_action;
+
+	memset(&signal_action, 0, sizeof(signal_action));
+	signal_action.sa_sigaction = signal_action_handler;
+	signal_action.sa_flags = SA_SIGINFO;
+	sigaction(SIGBUS, &signal_action, NULL);
+}
+
+char cacheline_buf[128] __cacheline_aligned;
+
+int test_copy_first_unaligned(void)
+{
+	/* Only run this test on a P9 or later */
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+
+	/* Register our signal handler with SIGBUS */
+	setup_signal_handler();
+
+	/* +1 makes buf unaligned */
+	copy_first(cacheline_buf+1);
+
+	/* We should not get here */
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_copy_first_unaligned, "test_copy_first_unaligned");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore
new file mode 100644
index 000000000..9161679b1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore
@@ -0,0 +1,7 @@
+gettimeofday
+context_switch
+fork
+exec_target
+mmap_bench
+futex_bench
+null_syscall
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
new file mode 100644
index 000000000..d40300a65
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall
+TEST_GEN_FILES := exec_target
+
+CFLAGS += -O2
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+
+$(OUTPUT)/context_switch: ../utils.c
+$(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
+$(OUTPUT)/context_switch: LDLIBS += -lpthread
+
+$(OUTPUT)/fork: LDLIBS += -lpthread
+
+$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
new file mode 100644
index 000000000..9ec767469
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -0,0 +1,506 @@
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <signal.h>
+#include <assert.h>
+#include <pthread.h>
+#include <limits.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/sysinfo.h>
+#include <sys/types.h>
+#include <sys/shm.h>
+#include <linux/futex.h>
+#ifdef __powerpc__
+#include <altivec.h>
+#endif
+#include "utils.h"
+
+static unsigned int timeout = 30;
+
+static int touch_vdso;
+struct timeval tv;
+
+static int touch_fp = 1;
+double fp;
+
+static int touch_vector = 1;
+vector int a, b, c;
+
+#ifdef __powerpc__
+static int touch_altivec = 1;
+
+/*
+ * Note: LTO (Link Time Optimisation) doesn't play well with this function
+ * attribute. Be very careful enabling LTO for this test.
+ */
+static void __attribute__((__target__("no-vsx"))) altivec_touch_fn(void)
+{
+	c = a + b;
+}
+#endif
+
+static void touch(void)
+{
+	if (touch_vdso)
+		gettimeofday(&tv, NULL);
+
+	if (touch_fp)
+		fp += 0.1;
+
+#ifdef __powerpc__
+	if (touch_altivec)
+		altivec_touch_fn();
+#endif
+
+	if (touch_vector)
+		c = a + b;
+
+	asm volatile("# %0 %1 %2": : "r"(&tv), "r"(&fp), "r"(&c));
+}
+
+static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
+{
+	int rc;
+	pthread_t tid;
+	cpu_set_t cpuset;
+	pthread_attr_t attr;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+
+	rc = pthread_attr_init(&attr);
+	if (rc) {
+		errno = rc;
+		perror("pthread_attr_init");
+		exit(1);
+	}
+
+	rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+	if (rc)	{
+		errno = rc;
+		perror("pthread_attr_setaffinity_np");
+		exit(1);
+	}
+
+	rc = pthread_create(&tid, &attr, fn, arg);
+	if (rc) {
+		errno = rc;
+		perror("pthread_create");
+		exit(1);
+	}
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
+{
+	int pid, ncpus;
+	cpu_set_t *cpuset;
+	size_t size;
+
+	pid = fork();
+	if (pid == -1) {
+		perror("fork");
+		exit(1);
+	}
+
+	if (pid)
+		return;
+
+	ncpus = get_nprocs();
+	size = CPU_ALLOC_SIZE(ncpus);
+	cpuset = CPU_ALLOC(ncpus);
+	if (!cpuset) {
+		perror("malloc");
+		exit(1);
+	}
+	CPU_ZERO_S(size, cpuset);
+	CPU_SET_S(cpu, size, cpuset);
+
+	if (sched_setaffinity(0, size, cpuset)) {
+		perror("sched_setaffinity");
+		CPU_FREE(cpuset);
+		exit(1);
+	}
+
+	CPU_FREE(cpuset);
+	fn(arg);
+
+	exit(0);
+}
+
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void sigalrm_handler(int junk)
+{
+	unsigned long i = iterations;
+
+	printf("%ld\n", i - iterations_prev);
+	iterations_prev = i;
+
+	if (--timeout == 0)
+		kill(0, SIGUSR1);
+
+	alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+	exit(0);
+}
+
+struct actions {
+	void (*setup)(int, int);
+	void *(*thread1)(void *);
+	void *(*thread2)(void *);
+};
+
+#define READ 0
+#define WRITE 1
+
+static int pipe_fd1[2];
+static int pipe_fd2[2];
+
+static void pipe_setup(int cpu1, int cpu2)
+{
+	if (pipe(pipe_fd1) || pipe(pipe_fd2))
+		exit(1);
+}
+
+static void *pipe_thread1(void *arg)
+{
+	signal(SIGALRM, sigalrm_handler);
+	alarm(1);
+
+	while (1) {
+		assert(read(pipe_fd1[READ], &c, 1) == 1);
+		touch();
+
+		assert(write(pipe_fd2[WRITE], &c, 1) == 1);
+		touch();
+
+		iterations += 2;
+	}
+
+	return NULL;
+}
+
+static void *pipe_thread2(void *arg)
+{
+	while (1) {
+		assert(write(pipe_fd1[WRITE], &c, 1) == 1);
+		touch();
+
+		assert(read(pipe_fd2[READ], &c, 1) == 1);
+		touch();
+	}
+
+	return NULL;
+}
+
+static struct actions pipe_actions = {
+	.setup = pipe_setup,
+	.thread1 = pipe_thread1,
+	.thread2 = pipe_thread2,
+};
+
+static void yield_setup(int cpu1, int cpu2)
+{
+	if (cpu1 != cpu2) {
+		fprintf(stderr, "Both threads must be on the same CPU for yield test\n");
+		exit(1);
+	}
+}
+
+static void *yield_thread1(void *arg)
+{
+	signal(SIGALRM, sigalrm_handler);
+	alarm(1);
+
+	while (1) {
+		sched_yield();
+		touch();
+
+		iterations += 2;
+	}
+
+	return NULL;
+}
+
+static void *yield_thread2(void *arg)
+{
+	while (1) {
+		sched_yield();
+		touch();
+	}
+
+	return NULL;
+}
+
+static struct actions yield_actions = {
+	.setup = yield_setup,
+	.thread1 = yield_thread1,
+	.thread2 = yield_thread2,
+};
+
+static long sys_futex(void *addr1, int op, int val1, struct timespec *timeout,
+		      void *addr2, int val3)
+{
+	return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
+}
+
+static unsigned long cmpxchg(unsigned long *p, unsigned long expected,
+			     unsigned long desired)
+{
+	unsigned long exp = expected;
+
+	__atomic_compare_exchange_n(p, &exp, desired, 0,
+				    __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+	return exp;
+}
+
+static unsigned long xchg(unsigned long *p, unsigned long val)
+{
+	return __atomic_exchange_n(p, val, __ATOMIC_SEQ_CST);
+}
+
+static int processes;
+
+static int mutex_lock(unsigned long *m)
+{
+	int c;
+	int flags = FUTEX_WAIT;
+	if (!processes)
+		flags |= FUTEX_PRIVATE_FLAG;
+
+	c = cmpxchg(m, 0, 1);
+	if (!c)
+		return 0;
+
+	if (c == 1)
+		c = xchg(m, 2);
+
+	while (c) {
+		sys_futex(m, flags, 2, NULL, NULL, 0);
+		c = xchg(m, 2);
+	}
+
+	return 0;
+}
+
+static int mutex_unlock(unsigned long *m)
+{
+	int flags = FUTEX_WAKE;
+	if (!processes)
+		flags |= FUTEX_PRIVATE_FLAG;
+
+	if (*m == 2)
+		*m = 0;
+	else if (xchg(m, 0) == 1)
+		return 0;
+
+	sys_futex(m, flags, 1, NULL, NULL, 0);
+
+	return 0;
+}
+
+static unsigned long *m1, *m2;
+
+static void futex_setup(int cpu1, int cpu2)
+{
+	if (!processes) {
+		static unsigned long _m1, _m2;
+		m1 = &_m1;
+		m2 = &_m2;
+	} else {
+		int shmid;
+		void *shmaddr;
+
+		shmid = shmget(IPC_PRIVATE, getpagesize(), SHM_R | SHM_W);
+		if (shmid < 0) {
+			perror("shmget");
+			exit(1);
+		}
+
+		shmaddr = shmat(shmid, NULL, 0);
+		if (shmaddr == (char *)-1) {
+			perror("shmat");
+			shmctl(shmid, IPC_RMID, NULL);
+			exit(1);
+		}
+
+		shmctl(shmid, IPC_RMID, NULL);
+
+		m1 = shmaddr;
+		m2 = shmaddr + sizeof(*m1);
+	}
+
+	*m1 = 0;
+	*m2 = 0;
+
+	mutex_lock(m1);
+	mutex_lock(m2);
+}
+
+static void *futex_thread1(void *arg)
+{
+	signal(SIGALRM, sigalrm_handler);
+	alarm(1);
+
+	while (1) {
+		mutex_lock(m2);
+		mutex_unlock(m1);
+
+		iterations += 2;
+	}
+
+	return NULL;
+}
+
+static void *futex_thread2(void *arg)
+{
+	while (1) {
+		mutex_unlock(m2);
+		mutex_lock(m1);
+	}
+
+	return NULL;
+}
+
+static struct actions futex_actions = {
+	.setup = futex_setup,
+	.thread1 = futex_thread1,
+	.thread2 = futex_thread2,
+};
+
+static struct option options[] = {
+	{ "test", required_argument, 0, 't' },
+	{ "process", no_argument, &processes, 1 },
+	{ "timeout", required_argument, 0, 's' },
+	{ "vdso", no_argument, &touch_vdso, 1 },
+	{ "no-fp", no_argument, &touch_fp, 0 },
+#ifdef __powerpc__
+	{ "no-altivec", no_argument, &touch_altivec, 0 },
+#endif
+	{ "no-vector", no_argument, &touch_vector, 0 },
+	{ 0, },
+};
+
+static void usage(void)
+{
+	fprintf(stderr, "Usage: context_switch2 <options> CPU1 CPU2\n\n");
+	fprintf(stderr, "\t\t--test=X\tpipe, futex or yield (default)\n");
+	fprintf(stderr, "\t\t--process\tUse processes (default threads)\n");
+	fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+	fprintf(stderr, "\t\t--vdso\t\ttouch VDSO\n");
+	fprintf(stderr, "\t\t--no-fp\t\tDon't touch FP\n");
+#ifdef __powerpc__
+	fprintf(stderr, "\t\t--no-altivec\tDon't touch altivec\n");
+#endif
+	fprintf(stderr, "\t\t--no-vector\tDon't touch vector\n");
+}
+
+int main(int argc, char *argv[])
+{
+	signed char c;
+	struct actions *actions = &yield_actions;
+	int cpu1;
+	int cpu2;
+	static void (*start_fn)(void *(*fn)(void *), void *arg, unsigned long cpu);
+
+	while (1) {
+		int option_index = 0;
+
+		c = getopt_long(argc, argv, "", options, &option_index);
+
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 0:
+			if (options[option_index].flag != 0)
+				break;
+
+			usage();
+			exit(1);
+			break;
+
+		case 't':
+			if (!strcmp(optarg, "pipe")) {
+				actions = &pipe_actions;
+			} else if (!strcmp(optarg, "yield")) {
+				actions = &yield_actions;
+			} else if (!strcmp(optarg, "futex")) {
+				actions = &futex_actions;
+			} else {
+				usage();
+				exit(1);
+			}
+			break;
+
+		case 's':
+			timeout = atoi(optarg);
+			break;
+
+		default:
+			usage();
+			exit(1);
+		}
+	}
+
+	if (processes)
+		start_fn = start_process_on;
+	else
+		start_fn = start_thread_on;
+
+	if (((argc - optind) != 2)) {
+		cpu1 = cpu2 = pick_online_cpu();
+	} else {
+		cpu1 = atoi(argv[optind++]);
+		cpu2 = atoi(argv[optind++]);
+	}
+
+	printf("Using %s with ", processes ? "processes" : "threads");
+
+	if (actions == &pipe_actions)
+		printf("pipe");
+	else if (actions == &yield_actions)
+		printf("yield");
+	else
+		printf("futex");
+
+	printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n",
+	       cpu1, cpu2, touch_fp ?  "yes" : "no", touch_altivec ? "yes" : "no",
+	       touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");
+
+	/* Create a new process group so we can signal everyone for exit */
+	setpgid(getpid(), getpid());
+
+	signal(SIGUSR1, sigusr1_handler);
+
+	actions->setup(cpu1, cpu2);
+
+	start_fn(actions->thread1, NULL, cpu1);
+	start_fn(actions->thread2, NULL, cpu2);
+
+	while (1)
+		sleep(3600);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
new file mode 100644
index 000000000..c14b0fc1e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Part of fork context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+void _start(void)
+{
+	syscall(SYS_exit, 0);
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/fork.c b/tools/testing/selftests/powerpc/benchmarks/fork.c
new file mode 100644
index 000000000..d312e638c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/fork.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static unsigned int timeout = 30;
+
+static void set_cpu(int cpu)
+{
+	cpu_set_t cpuset;
+
+	if (cpu == -1)
+		return;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+		perror("sched_setaffinity");
+		exit(1);
+	}
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, int cpu)
+{
+	int pid;
+
+	pid = fork();
+	if (pid == -1) {
+		perror("fork");
+		exit(1);
+	}
+
+	if (pid)
+		return;
+
+	set_cpu(cpu);
+
+	fn(arg);
+
+	exit(0);
+}
+
+static int cpu;
+static int do_fork = 0;
+static int do_vfork = 0;
+static int do_exec = 0;
+static char *exec_file;
+static int exec_target = 0;
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void run_exec(void)
+{
+	char *const argv[] = { "./exec_target", NULL };
+
+	if (execve("./exec_target", argv, NULL) == -1) {
+		perror("execve");
+		exit(1);
+	}
+}
+
+static void bench_fork(void)
+{
+	while (1) {
+		pid_t pid = fork();
+		if (pid == -1) {
+			perror("fork");
+			exit(1);
+		}
+		if (pid == 0) {
+			if (do_exec)
+				run_exec();
+			_exit(0);
+		}
+		pid = waitpid(pid, NULL, 0);
+		if (pid == -1) {
+			perror("waitpid");
+			exit(1);
+		}
+		iterations++;
+	}
+}
+
+static void bench_vfork(void)
+{
+	while (1) {
+		pid_t pid = vfork();
+		if (pid == -1) {
+			perror("fork");
+			exit(1);
+		}
+		if (pid == 0) {
+			if (do_exec)
+				run_exec();
+			_exit(0);
+		}
+		pid = waitpid(pid, NULL, 0);
+		if (pid == -1) {
+			perror("waitpid");
+			exit(1);
+		}
+		iterations++;
+	}
+}
+
+static void *null_fn(void *arg)
+{
+	pthread_exit(NULL);
+}
+
+static void bench_thread(void)
+{
+	pthread_t tid;
+	cpu_set_t cpuset;
+	pthread_attr_t attr;
+	int rc;
+
+	rc = pthread_attr_init(&attr);
+	if (rc) {
+		errno = rc;
+		perror("pthread_attr_init");
+		exit(1);
+	}
+
+	if (cpu != -1) {
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu, &cpuset);
+
+		rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+		if (rc) {
+			errno = rc;
+			perror("pthread_attr_setaffinity_np");
+			exit(1);
+		}
+	}
+
+	while (1) {
+		rc = pthread_create(&tid, &attr, null_fn, NULL);
+		if (rc) {
+			errno = rc;
+			perror("pthread_create");
+			exit(1);
+		}
+		rc = pthread_join(tid, NULL);
+		if (rc) {
+			errno = rc;
+			perror("pthread_join");
+			exit(1);
+		}
+		iterations++;
+	}
+}
+
+static void sigalrm_handler(int junk)
+{
+	unsigned long i = iterations;
+
+	printf("%ld\n", i - iterations_prev);
+	iterations_prev = i;
+
+	if (--timeout == 0)
+		kill(0, SIGUSR1);
+
+	alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+	exit(0);
+}
+
+static void *bench_proc(void *arg)
+{
+	signal(SIGALRM, sigalrm_handler);
+	alarm(1);
+
+	if (do_fork)
+		bench_fork();
+	else if (do_vfork)
+		bench_vfork();
+	else
+		bench_thread();
+
+	return NULL;
+}
+
+static struct option options[] = {
+	{ "fork", no_argument, &do_fork, 1 },
+	{ "vfork", no_argument, &do_vfork, 1 },
+	{ "exec", no_argument, &do_exec, 1 },
+	{ "timeout", required_argument, 0, 's' },
+	{ "exec-target", no_argument, &exec_target, 1 },
+	{ NULL },
+};
+
+static void usage(void)
+{
+	fprintf(stderr, "Usage: fork <options> CPU\n\n");
+	fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n");
+	fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n");
+	fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n");
+	fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+	fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n");
+}
+
+int main(int argc, char *argv[])
+{
+	signed char c;
+
+	while (1) {
+		int option_index = 0;
+
+		c = getopt_long(argc, argv, "", options, &option_index);
+
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 0:
+			if (options[option_index].flag != 0)
+				break;
+
+			usage();
+			exit(1);
+			break;
+
+		case 's':
+			timeout = atoi(optarg);
+			break;
+
+		default:
+			usage();
+			exit(1);
+		}
+	}
+
+	if (do_fork && do_vfork) {
+		usage();
+		exit(1);
+	}
+	if (do_exec && !do_fork && !do_vfork) {
+		usage();
+		exit(1);
+	}
+
+	if (do_exec) {
+		char *dirname = strdup(argv[0]);
+		int i;
+		i = strlen(dirname) - 1;
+		while (i) {
+			if (dirname[i] == '/') {
+				dirname[i] = '\0';
+				if (chdir(dirname) == -1) {
+					perror("chdir");
+					exit(1);
+				}
+				break;
+			}
+			i--;
+		}
+	}
+
+	if (exec_target) {
+		exit(0);
+	}
+
+	if (((argc - optind) != 1)) {
+		cpu = -1;
+	} else {
+		cpu = atoi(argv[optind++]);
+	}
+
+	if (do_exec)
+		exec_file = argv[0];
+
+	set_cpu(cpu);
+
+	printf("Using ");
+	if (do_fork)
+		printf("fork");
+	else if (do_vfork)
+		printf("vfork");
+	else
+		printf("clone");
+
+	if (do_exec)
+		printf(" + exec");
+
+	printf(" on cpu %d\n", cpu);
+
+	/* Create a new process group so we can signal everyone for exit */
+	setpgid(getpid(), getpid());
+
+	signal(SIGUSR1, sigusr1_handler);
+
+	start_process_on(bench_proc, NULL, cpu);
+
+	while (1)
+		sleep(3600);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/futex_bench.c b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c
new file mode 100644
index 000000000..d58e4dc50
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2016, Anton Blanchard, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/futex.h>
+
+#include "utils.h"
+
+#define ITERATIONS 100000000
+
+#define futex(A, B, C, D, E, F)	 syscall(__NR_futex, A, B, C, D, E, F)
+
+int test_futex(void)
+{
+	struct timespec ts_start, ts_end;
+	unsigned long i = ITERATIONS;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+	while (i--) {
+		unsigned int addr = 0;
+		futex(&addr, FUTEX_WAKE, 1, NULL, NULL, 0);
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &ts_end);
+
+	printf("time = %.6f\n", ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9);
+
+	return 0;
+}
+
+int main(void)
+{
+	test_harness_set_timeout(300);
+	return test_harness(test_futex, "futex_bench");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
new file mode 100644
index 000000000..3af3c21e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2015, Anton Blanchard, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sys/time.h>
+#include <stdio.h>
+
+#include "utils.h"
+
+static int test_gettimeofday(void)
+{
+	int i;
+
+	struct timeval tv_start, tv_end;
+
+	gettimeofday(&tv_start, NULL);
+
+	for(i = 0; i < 100000000; i++) {
+		gettimeofday(&tv_end, NULL);
+	}
+
+	printf("time = %.6f\n", tv_end.tv_sec - tv_start.tv_sec + (tv_end.tv_usec - tv_start.tv_usec) * 1e-6);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_gettimeofday, "gettimeofday");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
new file mode 100644
index 000000000..033de0560
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2016, Anton Blanchard, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <getopt.h>
+
+#include "utils.h"
+
+#define ITERATIONS 5000000
+
+#define MEMSIZE (1UL << 27)
+#define PAGE_SIZE (1UL << 16)
+#define CHUNK_COUNT (MEMSIZE/PAGE_SIZE)
+
+static int pg_fault;
+static int iterations = ITERATIONS;
+
+static struct option options[] = {
+	{ "pgfault", no_argument, &pg_fault, 1 },
+	{ "iterations", required_argument, 0, 'i' },
+	{ 0, },
+};
+
+static void usage(void)
+{
+	printf("mmap_bench <--pgfault> <--iterations count>\n");
+}
+
+int test_mmap(void)
+{
+	struct timespec ts_start, ts_end;
+	unsigned long i = iterations;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+	while (i--) {
+		char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE,
+			       MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+		FAIL_IF(c == MAP_FAILED);
+		if (pg_fault) {
+			int count;
+			for (count = 0; count < CHUNK_COUNT; count++)
+				c[count << 16] = 'c';
+		}
+		munmap(c, MEMSIZE);
+	}
+
+	clock_gettime(CLOCK_MONOTONIC, &ts_end);
+
+	printf("time = %.6f\n", ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9);
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	signed char c;
+	while (1) {
+		int option_index = 0;
+
+		c = getopt_long(argc, argv, "", options, &option_index);
+
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 0:
+			if (options[option_index].flag != 0)
+				break;
+
+			usage();
+			exit(1);
+			break;
+		case 'i':
+			iterations = atoi(optarg);
+			break;
+		default:
+			usage();
+			exit(1);
+		}
+	}
+
+	test_harness_set_timeout(300);
+	return test_harness(test_mmap, "mmap_bench");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
new file mode 100644
index 000000000..ecc14d68e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
@@ -0,0 +1,157 @@
+/*
+ * Test null syscall performance
+ *
+ * Copyright (C) 2009-2015 Anton Blanchard, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define NR_LOOPS 10000000
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <signal.h>
+
+static volatile int soak_done;
+unsigned long long clock_frequency;
+unsigned long long timebase_frequency;
+double timebase_multiplier;
+
+static inline unsigned long long mftb(void)
+{
+	unsigned long low;
+
+	asm volatile("mftb %0" : "=r" (low));
+
+	return low;
+}
+
+static void sigalrm_handler(int unused)
+{
+	soak_done = 1;
+}
+
+/*
+ * Use a timer instead of busy looping on clock_gettime() so we don't
+ * pollute profiles with glibc and VDSO hits.
+ */
+static void cpu_soak_usecs(unsigned long usecs)
+{
+	struct itimerval val;
+
+	memset(&val, 0, sizeof(val));
+	val.it_value.tv_usec = usecs;
+
+	signal(SIGALRM, sigalrm_handler);
+	setitimer(ITIMER_REAL, &val, NULL);
+
+	while (1) {
+		if (soak_done)
+			break;
+	}
+
+	signal(SIGALRM, SIG_DFL);
+}
+
+/*
+ * This only works with recent kernels where cpufreq modifies
+ * /proc/cpuinfo dynamically.
+ */
+static void get_proc_frequency(void)
+{
+	FILE *f;
+	char line[128];
+	char *p, *end;
+	unsigned long v;
+	double d;
+	char *override;
+
+	/* Try to get out of low power/low frequency mode */
+	cpu_soak_usecs(0.25 * 1000000);
+
+	f = fopen("/proc/cpuinfo", "r");
+	if (f == NULL)
+		return;
+
+	timebase_frequency = 0;
+
+	while (fgets(line, sizeof(line), f) != NULL) {
+		if (strncmp(line, "timebase", 8) == 0) {
+			p = strchr(line, ':');
+			if (p != NULL) {
+				v = strtoull(p + 1, &end, 0);
+				if (end != p + 1)
+					timebase_frequency = v;
+			}
+		}
+
+		if (((strncmp(line, "clock", 5) == 0) ||
+		     (strncmp(line, "cpu MHz", 7) == 0))) {
+			p = strchr(line, ':');
+			if (p != NULL) {
+				d = strtod(p + 1, &end);
+				if (end != p + 1) {
+					/* Find fastest clock frequency */
+					if ((d * 1000000ULL) > clock_frequency)
+						clock_frequency = d * 1000000ULL;
+				}
+			}
+		}
+	}
+
+	fclose(f);
+
+	override = getenv("FREQUENCY");
+	if (override)
+		clock_frequency = strtoull(override, NULL, 10);
+
+	if (timebase_frequency)
+		timebase_multiplier = (double)clock_frequency
+					/ timebase_frequency;
+	else
+		timebase_multiplier = 1;
+}
+
+static void do_null_syscall(unsigned long nr)
+{
+	unsigned long i;
+
+	for (i = 0; i < nr; i++)
+		getppid();
+}
+
+#define TIME(A, STR) \
+
+int main(void)
+{
+	unsigned long tb_start, tb_now;
+	struct timespec tv_start, tv_now;
+	unsigned long long elapsed_ns, elapsed_tb;
+
+	get_proc_frequency();
+
+	clock_gettime(CLOCK_MONOTONIC, &tv_start);
+	tb_start = mftb();
+
+	do_null_syscall(NR_LOOPS);
+
+	clock_gettime(CLOCK_MONOTONIC, &tv_now);
+	tb_now = mftb();
+
+	elapsed_ns = (tv_now.tv_sec - tv_start.tv_sec) * 1000000000ULL +
+			(tv_now.tv_nsec - tv_start.tv_nsec);
+	elapsed_tb = tb_now - tb_start;
+
+	printf("%10.2f ns %10.2f cycles\n", (float)elapsed_ns / NR_LOOPS,
+			(float)elapsed_tb * timebase_multiplier / NR_LOOPS);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/powerpc/cache_shape/.gitignore b/tools/testing/selftests/powerpc/cache_shape/.gitignore
new file mode 100644
index 000000000..ec1848434
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cache_shape/.gitignore
@@ -0,0 +1 @@
+cache_shape
diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
new file mode 100644
index 000000000..689f6c8eb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := cache_shape
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/cache_shape/cache_shape.c b/tools/testing/selftests/powerpc/cache_shape/cache_shape.c
new file mode 100644
index 000000000..29ec07eba
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cache_shape/cache_shape.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2017, Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <link.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#ifndef AT_L1I_CACHESIZE
+#define AT_L1I_CACHESIZE	40
+#define AT_L1I_CACHEGEOMETRY	41
+#define AT_L1D_CACHESIZE	42
+#define AT_L1D_CACHEGEOMETRY	43
+#define AT_L2_CACHESIZE		44
+#define AT_L2_CACHEGEOMETRY	45
+#define AT_L3_CACHESIZE		46
+#define AT_L3_CACHEGEOMETRY	47
+#endif
+
+static void print_size(const char *label, uint32_t val)
+{
+	printf("%s cache size: %#10x %10dB %10dK\n", label, val, val, val / 1024);
+}
+
+static void print_geo(const char *label, uint32_t val)
+{
+	uint16_t assoc;
+
+	printf("%s line size:  %#10x       ", label, val & 0xFFFF);
+
+	assoc = val >> 16;
+	if (assoc)
+		printf("%u-way", assoc);
+	else
+		printf("fully");
+
+	printf(" associative\n");
+}
+
+static int test_cache_shape()
+{
+	static char buffer[4096];
+	ElfW(auxv_t) *p;
+	int found;
+
+	FAIL_IF(read_auxv(buffer, sizeof(buffer)));
+
+	found = 0;
+
+	p = find_auxv_entry(AT_L1I_CACHESIZE, buffer);
+	if (p) {
+		found++;
+		print_size("L1I ", (uint32_t)p->a_un.a_val);
+	}
+
+	p = find_auxv_entry(AT_L1I_CACHEGEOMETRY, buffer);
+	if (p) {
+		found++;
+		print_geo("L1I ", (uint32_t)p->a_un.a_val);
+	}
+
+	p = find_auxv_entry(AT_L1D_CACHESIZE, buffer);
+	if (p) {
+		found++;
+		print_size("L1D ", (uint32_t)p->a_un.a_val);
+	}
+
+	p = find_auxv_entry(AT_L1D_CACHEGEOMETRY, buffer);
+	if (p) {
+		found++;
+		print_geo("L1D ", (uint32_t)p->a_un.a_val);
+	}
+
+	p = find_auxv_entry(AT_L2_CACHESIZE, buffer);
+	if (p) {
+		found++;
+		print_size("L2  ", (uint32_t)p->a_un.a_val);
+	}
+
+	p = find_auxv_entry(AT_L2_CACHEGEOMETRY, buffer);
+	if (p) {
+		found++;
+		print_geo("L2  ", (uint32_t)p->a_un.a_val);
+	}
+
+	p = find_auxv_entry(AT_L3_CACHESIZE, buffer);
+	if (p) {
+		found++;
+		print_size("L3  ", (uint32_t)p->a_un.a_val);
+	}
+
+	p = find_auxv_entry(AT_L3_CACHEGEOMETRY, buffer);
+	if (p) {
+		found++;
+		print_geo("L3  ", (uint32_t)p->a_un.a_val);
+	}
+
+	/* If we found none we're probably on a system where they don't exist */
+	SKIP_IF(found == 0);
+
+	/* But if we found any, we expect to find them all */
+	FAIL_IF(found != 8);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_cache_shape, "cache_shape");
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
new file mode 100644
index 000000000..ce12cd0e2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -0,0 +1,13 @@
+copyuser_64_t0
+copyuser_64_t1
+copyuser_64_t2
+copyuser_power7_t0
+copyuser_power7_t1
+memcpy_64_t0
+memcpy_64_t1
+memcpy_64_t2
+memcpy_power7_t0
+memcpy_power7_t1
+copyuser_64_exc_t0
+copyuser_64_exc_t1
+copyuser_64_exc_t2
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
new file mode 100644
index 000000000..44574f381
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+# The loops are all 64-bit code
+CFLAGS += -m64
+CFLAGS += -I$(CURDIR)
+CFLAGS += -D SELFTEST
+CFLAGS += -maltivec
+CFLAGS += -mcpu=power4
+
+# Use our CFLAGS for the implicit .S rule & set the asm machine type
+ASFLAGS = $(CFLAGS) -Wa,-mpower4
+
+TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
+		copyuser_p7_t0 copyuser_p7_t1 \
+		memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
+		memcpy_p7_t0 memcpy_p7_t1 \
+		copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
+
+EXTRA_SOURCES := validate.c ../harness.c stubs.S
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/copyuser_64_t%:	copyuser_64.S $(EXTRA_SOURCES)
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D COPY_LOOP=test___copy_tofrom_user_base \
+		-D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \
+		-o $@ $^
+
+$(OUTPUT)/copyuser_p7_t%:	copyuser_power7.S $(EXTRA_SOURCES)
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D COPY_LOOP=test___copy_tofrom_user_power7 \
+		-D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \
+		-o $@ $^
+
+# Strictly speaking, we only need the memcpy_64 test cases for big-endian
+$(OUTPUT)/memcpy_64_t%:	memcpy_64.S $(EXTRA_SOURCES)
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D COPY_LOOP=test_memcpy \
+		-D SELFTEST_CASE=$(subst memcpy_64_t,,$(notdir $@)) \
+		-o $@ $^
+
+$(OUTPUT)/memcpy_p7_t%:	memcpy_power7.S $(EXTRA_SOURCES)
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D COPY_LOOP=test_memcpy_power7 \
+		-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
+		-o $@ $^
+
+$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
+		copy_tofrom_user_reference.S stubs.S
+	$(CC) $(CPPFLAGS) $(CFLAGS) \
+		-D COPY_LOOP=test___copy_tofrom_user_base \
+		-D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \
+		-o $@ $^
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h b/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/asm/export.h
new file mode 100644
index 000000000..0bab35f67
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/export.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define EXPORT_SYMBOL(x)
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h b/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
new file mode 100644
index 000000000..0605df807
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SELFTESTS_POWERPC_PPC_ASM_H
+#define __SELFTESTS_POWERPC_PPC_ASM_H
+#include <ppc-asm.h>
+
+#define CONFIG_ALTIVEC
+
+#define r1	1
+
+#define R14 r14
+#define R15 r15
+#define R16 r16
+#define R17 r17
+#define R18 r18
+#define R19 r19
+#define R20 r20
+#define R21 r21
+#define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
+
+#define STACKFRAMESIZE	256
+#define STK_REG(i)	(112 + ((i)-14)*8)
+
+#define _GLOBAL(A) FUNC_START(test_ ## A)
+#define _GLOBAL_TOC(A) _GLOBAL(A)
+
+#define PPC_MTOCRF(A, B)	mtocrf A, B
+
+#define EX_TABLE(x, y)			\
+	.section __ex_table,"a";	\
+	.8byte	x, y;			\
+	.previous
+
+#define BEGIN_FTR_SECTION		.if test_feature
+#define FTR_SECTION_ELSE		.else
+#define ALT_FTR_SECTION_END_IFCLR(x)	.endif
+#define ALT_FTR_SECTION_END_IFSET(x)	.endif
+#define ALT_FTR_SECTION_END(x, y)	.endif
+#define END_FTR_SECTION_IFCLR(x)	.endif
+#define END_FTR_SECTION_IFSET(x)	.endif
+
+/* Default to taking the first of any alternative feature sections */
+test_feature = 1
+
+#endif /* __SELFTESTS_POWERPC_PPC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/processor.h b/tools/testing/selftests/powerpc/copyloops/asm/processor.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/processor.h
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S
new file mode 100644
index 000000000..3363b8640
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S
@@ -0,0 +1,24 @@
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copy_tofrom_user_reference)
+	cmpdi	r5,0
+	beq	4f
+
+	mtctr	r5
+
+1:	lbz	r6,0(r4)
+2:	stb	r6,0(r3)
+	addi	r3,r3,1
+	addi	r4,r4,1
+	bdnz	1b
+
+3:	mfctr	r3
+	blr
+
+4:	mr	r3,r5
+	blr
+
+.section __ex_table,"a"
+	.llong	1b,3b
+	.llong	2b,3b
+.text
diff --git a/tools/testing/selftests/powerpc/copyloops/copyuser_64.S b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
new file mode 120000
index 000000000..f1c418a25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copyuser_64.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
new file mode 120000
index 000000000..478689598
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copyuser_power7.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/exc_validate.c b/tools/testing/selftests/powerpc/copyloops/exc_validate.c
new file mode 100644
index 000000000..c896ea9a7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/exc_validate.c
@@ -0,0 +1,124 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "utils.h"
+
+extern char __start___ex_table[];
+extern char __stop___ex_table[];
+
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
+static void segv_handler(int signr, siginfo_t *info, void *ptr)
+{
+	ucontext_t *uc = (ucontext_t *)ptr;
+	unsigned long addr = (unsigned long)info->si_addr;
+	unsigned long *ip = &UCONTEXT_NIA(uc);
+	unsigned long *ex_p = (unsigned long *)__start___ex_table;
+
+	while (ex_p < (unsigned long *)__stop___ex_table) {
+		unsigned long insn, fixup;
+
+		insn = *ex_p++;
+		fixup = *ex_p++;
+
+		if (insn == *ip) {
+			*ip = fixup;
+			return;
+		}
+	}
+
+	printf("No exception table match for NIA %lx ADDR %lx\n", *ip, addr);
+	abort();
+}
+
+static void setup_segv_handler(void)
+{
+	struct sigaction action;
+
+	memset(&action, 0, sizeof(action));
+	action.sa_sigaction = segv_handler;
+	action.sa_flags = SA_SIGINFO;
+	sigaction(SIGSEGV, &action, NULL);
+}
+
+unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
+unsigned long test_copy_tofrom_user_reference(void *to, const void *from, unsigned long size);
+
+static int total_passed;
+static int total_failed;
+
+static void do_one_test(char *dstp, char *srcp, unsigned long len)
+{
+	unsigned long got, expected;
+
+	got = COPY_LOOP(dstp, srcp, len);
+	expected = test_copy_tofrom_user_reference(dstp, srcp, len);
+
+	if (got != expected) {
+		total_failed++;
+		printf("FAIL from=%p to=%p len=%ld returned %ld, expected %ld\n",
+		       srcp, dstp, len, got, expected);
+		//abort();
+	} else
+		total_passed++;
+}
+
+//#define MAX_LEN 512
+#define MAX_LEN 16
+
+int test_copy_exception(void)
+{
+	int page_size;
+	static char *p, *q;
+	unsigned long src, dst, len;
+
+	page_size = getpagesize();
+	p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
+		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+	if (p == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	memset(p, 0, page_size);
+
+	setup_segv_handler();
+
+	if (mprotect(p + page_size, page_size, PROT_NONE)) {
+		perror("mprotect");
+		exit(1);
+	}
+
+	q = p + page_size - MAX_LEN;
+
+	for (src = 0; src < MAX_LEN; src++) {
+		for (dst = 0; dst < MAX_LEN; dst++) {
+			for (len = 0; len < MAX_LEN+1; len++) {
+				// printf("from=%p to=%p len=%ld\n", q+dst, q+src, len);
+				do_one_test(q+dst, q+src, len);
+			}
+		}
+	}
+
+	printf("Totals:\n");
+	printf("  Pass: %d\n", total_passed);
+	printf("  Fail: %d\n", total_failed);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_copy_exception, str(COPY_LOOP));
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
new file mode 120000
index 000000000..cce33fb6f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcpy_64.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
new file mode 120000
index 000000000..0d6fbfaf3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcpy_power7.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S
new file mode 100644
index 000000000..ec8bcf2bf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/stubs.S
@@ -0,0 +1,19 @@
+#include <asm/ppc_asm.h>
+
+FUNC_START(enter_vmx_usercopy)
+	li	r3,1
+	blr
+
+FUNC_START(exit_vmx_usercopy)
+	li	r3,0
+	blr
+
+FUNC_START(enter_vmx_ops)
+	li	r3,1
+	blr
+
+FUNC_START(exit_vmx_ops)
+	blr
+
+FUNC_START(__copy_tofrom_user_base)
+	blr
diff --git a/tools/testing/selftests/powerpc/copyloops/validate.c b/tools/testing/selftests/powerpc/copyloops/validate.c
new file mode 100644
index 000000000..0f6873618
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/validate.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "utils.h"
+
+#define MAX_LEN 8192
+#define MAX_OFFSET 16
+#define MIN_REDZONE 128
+#define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE)
+#define POISON 0xa5
+
+unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
+
+static void do_one(char *src, char *dst, unsigned long src_off,
+		   unsigned long dst_off, unsigned long len, void *redzone,
+		   void *fill)
+{
+	char *srcp, *dstp;
+	unsigned long ret;
+	unsigned long i;
+
+	srcp = src + MIN_REDZONE + src_off;
+	dstp = dst + MIN_REDZONE + dst_off;
+
+	memset(src, POISON, BUFLEN);
+	memset(dst, POISON, BUFLEN);
+	memcpy(srcp, fill, len);
+
+	ret = COPY_LOOP(dstp, srcp, len);
+	if (ret && ret != (unsigned long)dstp) {
+		printf("(%p,%p,%ld) returned %ld\n", dstp, srcp, len, ret);
+		abort();
+	}
+
+	if (memcmp(dstp, srcp, len)) {
+		printf("(%p,%p,%ld) miscompare\n", dstp, srcp, len);
+		printf("src: ");
+		for (i = 0; i < len; i++)
+			printf("%02x ", srcp[i]);
+		printf("\ndst: ");
+		for (i = 0; i < len; i++)
+			printf("%02x ", dstp[i]);
+		printf("\n");
+		abort();
+	}
+
+	if (memcmp(dst, redzone, dstp - dst)) {
+		printf("(%p,%p,%ld) redzone before corrupted\n",
+		       dstp, srcp, len);
+		abort();
+	}
+
+	if (memcmp(dstp+len, redzone, dst+BUFLEN-(dstp+len))) {
+		printf("(%p,%p,%ld) redzone after corrupted\n",
+		       dstp, srcp, len);
+		abort();
+	}
+}
+
+int test_copy_loop(void)
+{
+	char *src, *dst, *redzone, *fill;
+	unsigned long len, src_off, dst_off;
+	unsigned long i;
+
+	src = memalign(BUFLEN, BUFLEN);
+	dst = memalign(BUFLEN, BUFLEN);
+	redzone = malloc(BUFLEN);
+	fill = malloc(BUFLEN);
+
+	if (!src || !dst || !redzone || !fill) {
+		fprintf(stderr, "malloc failed\n");
+		exit(1);
+	}
+
+	memset(redzone, POISON, BUFLEN);
+
+	/* Fill with sequential bytes */
+	for (i = 0; i < BUFLEN; i++)
+		fill[i] = i & 0xff;
+
+	for (len = 1; len < MAX_LEN; len++) {
+		for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
+			for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) {
+				do_one(src, dst, src_off, dst_off, len,
+				       redzone, fill);
+			}
+		}
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_copy_loop, str(COPY_LOOP));
+}
diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore b/tools/testing/selftests/powerpc/dscr/.gitignore
new file mode 100644
index 000000000..b585c6c15
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/.gitignore
@@ -0,0 +1,7 @@
+dscr_default_test
+dscr_explicit_test
+dscr_inherit_exec_test
+dscr_inherit_test
+dscr_sysfs_test
+dscr_sysfs_thread_test
+dscr_user_test
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
new file mode 100644
index 000000000..5df476364
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test	\
+	      dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test	\
+	      dscr_sysfs_thread_test
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h
new file mode 100644
index 000000000..cdb840bc5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr.h
@@ -0,0 +1,125 @@
+/*
+ * POWER Data Stream Control Register (DSCR)
+ *
+ * This header file contains helper functions and macros
+ * required for all the DSCR related test cases.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#ifndef _SELFTESTS_POWERPC_DSCR_DSCR_H
+#define _SELFTESTS_POWERPC_DSCR_DSCR_H
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+
+#define THREADS		100	/* Max threads */
+#define COUNT		100	/* Max iterations */
+#define DSCR_MAX	16	/* Max DSCR value */
+#define LEN_MAX		100	/* Max name length */
+
+#define DSCR_DEFAULT	"/sys/devices/system/cpu/dscr_default"
+#define CPU_PATH	"/sys/devices/system/cpu/"
+
+#define rmb()  asm volatile("lwsync":::"memory")
+#define wmb()  asm volatile("lwsync":::"memory")
+
+#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+/* Prilvilege state DSCR access */
+inline unsigned long get_dscr(void)
+{
+	unsigned long ret;
+
+	asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR_PRIV));
+
+	return ret;
+}
+
+inline void set_dscr(unsigned long val)
+{
+	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_PRIV));
+}
+
+/* Problem state DSCR access */
+inline unsigned long get_dscr_usr(void)
+{
+	unsigned long ret;
+
+	asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR));
+
+	return ret;
+}
+
+inline void set_dscr_usr(unsigned long val)
+{
+	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
+/* Default DSCR access */
+unsigned long get_default_dscr(void)
+{
+	int fd = -1, ret;
+	char buf[16];
+	unsigned long val;
+
+	if (fd == -1) {
+		fd = open(DSCR_DEFAULT, O_RDONLY);
+		if (fd == -1) {
+			perror("open() failed");
+			exit(1);
+		}
+	}
+	memset(buf, 0, sizeof(buf));
+	lseek(fd, 0, SEEK_SET);
+	ret = read(fd, buf, sizeof(buf));
+	if (ret == -1) {
+		perror("read() failed");
+		exit(1);
+	}
+	sscanf(buf, "%lx", &val);
+	close(fd);
+	return val;
+}
+
+void set_default_dscr(unsigned long val)
+{
+	int fd = -1, ret;
+	char buf[16];
+
+	if (fd == -1) {
+		fd = open(DSCR_DEFAULT, O_RDWR);
+		if (fd == -1) {
+			perror("open() failed");
+			exit(1);
+		}
+	}
+	sprintf(buf, "%lx\n", val);
+	ret = write(fd, buf, strlen(buf));
+	if (ret == -1) {
+		perror("write() failed");
+		exit(1);
+	}
+	close(fd);
+}
+
+double uniform_deviate(int seed)
+{
+	return seed * (1.0 / (RAND_MAX + 1.0));
+}
+#endif	/* _SELFTESTS_POWERPC_DSCR_DSCR_H */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
new file mode 100644
index 000000000..9e1a37e93
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -0,0 +1,127 @@
+/*
+ * POWER Data Stream Control Register (DSCR) default test
+ *
+ * This test modifies the system wide default DSCR through
+ * it's sysfs interface and then verifies that all threads
+ * see the correct changed DSCR value immediately.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static unsigned long dscr;		/* System DSCR default */
+static unsigned long sequence;
+static unsigned long result[THREADS];
+
+static void *do_test(void *in)
+{
+	unsigned long thread = (unsigned long)in;
+	unsigned long i;
+
+	for (i = 0; i < COUNT; i++) {
+		unsigned long d, cur_dscr, cur_dscr_usr;
+		unsigned long s1, s2;
+
+		s1 = READ_ONCE(sequence);
+		if (s1 & 1)
+			continue;
+		rmb();
+
+		d = dscr;
+		cur_dscr = get_dscr();
+		cur_dscr_usr = get_dscr_usr();
+
+		rmb();
+		s2 = sequence;
+
+		if (s1 != s2)
+			continue;
+
+		if (cur_dscr != d) {
+			fprintf(stderr, "thread %ld kernel DSCR should be %ld "
+				"but is %ld\n", thread, d, cur_dscr);
+			result[thread] = 1;
+			pthread_exit(&result[thread]);
+		}
+
+		if (cur_dscr_usr != d) {
+			fprintf(stderr, "thread %ld user DSCR should be %ld "
+				"but is %ld\n", thread, d, cur_dscr_usr);
+			result[thread] = 1;
+			pthread_exit(&result[thread]);
+		}
+	}
+	result[thread] = 0;
+	pthread_exit(&result[thread]);
+}
+
+int dscr_default(void)
+{
+	pthread_t threads[THREADS];
+	unsigned long i, *status[THREADS];
+	unsigned long orig_dscr_default;
+
+	orig_dscr_default = get_default_dscr();
+
+	/* Initial DSCR default */
+	dscr = 1;
+	set_default_dscr(dscr);
+
+	/* Spawn all testing threads */
+	for (i = 0; i < THREADS; i++) {
+		if (pthread_create(&threads[i], NULL, do_test, (void *)i)) {
+			perror("pthread_create() failed");
+			goto fail;
+		}
+	}
+
+	srand(getpid());
+
+	/* Keep changing the DSCR default */
+	for (i = 0; i < COUNT; i++) {
+		double ret = uniform_deviate(rand());
+
+		if (ret < 0.0001) {
+			sequence++;
+			wmb();
+
+			dscr++;
+			if (dscr > DSCR_MAX)
+				dscr = 0;
+
+			set_default_dscr(dscr);
+
+			wmb();
+			sequence++;
+		}
+	}
+
+	/* Individual testing thread exit status */
+	for (i = 0; i < THREADS; i++) {
+		if (pthread_join(threads[i], (void **)&(status[i]))) {
+			perror("pthread_join() failed");
+			goto fail;
+		}
+
+		if (*status[i]) {
+			printf("%ldth thread failed to join with %ld status\n",
+								i, *status[i]);
+			goto fail;
+		}
+	}
+	set_default_dscr(orig_dscr_default);
+	return 0;
+fail:
+	set_default_dscr(orig_dscr_default);
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(dscr_default, "dscr_default_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
new file mode 100644
index 000000000..ad9c3ec26
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -0,0 +1,71 @@
+/*
+ * POWER Data Stream Control Register (DSCR) explicit test
+ *
+ * This test modifies the DSCR value using mtspr instruction and
+ * verifies the change with mfspr instruction. It uses both the
+ * privilege state SPR and the problem state SPR for this purpose.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_explicit(void)
+{
+	unsigned long i, dscr = 0;
+
+	srand(getpid());
+	set_dscr(dscr);
+
+	for (i = 0; i < COUNT; i++) {
+		unsigned long cur_dscr, cur_dscr_usr;
+		double ret = uniform_deviate(rand());
+
+		if (ret < 0.001) {
+			dscr++;
+			if (dscr > DSCR_MAX)
+				dscr = 0;
+
+			set_dscr(dscr);
+		}
+
+		cur_dscr = get_dscr();
+		if (cur_dscr != dscr) {
+			fprintf(stderr, "Kernel DSCR should be %ld but "
+					"is %ld\n", dscr, cur_dscr);
+			return 1;
+		}
+
+		ret = uniform_deviate(rand());
+		if (ret < 0.001) {
+			dscr++;
+			if (dscr > DSCR_MAX)
+				dscr = 0;
+
+			set_dscr_usr(dscr);
+		}
+
+		cur_dscr_usr = get_dscr_usr();
+		if (cur_dscr_usr != dscr) {
+			fprintf(stderr, "User DSCR should be %ld but "
+					"is %ld\n", dscr, cur_dscr_usr);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(dscr_explicit, "dscr_explicit_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
new file mode 100644
index 000000000..c8c240acc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -0,0 +1,109 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork exec test
+ *
+ * This testcase modifies the DSCR using mtspr, forks & execs and
+ * verifies that the child is using the changed DSCR using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are privileged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be executed
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static char *prog;
+
+static void do_exec(unsigned long parent_dscr)
+{
+	unsigned long cur_dscr, cur_dscr_usr;
+
+	cur_dscr = get_dscr();
+	cur_dscr_usr = get_dscr_usr();
+
+	if (cur_dscr != parent_dscr) {
+		fprintf(stderr, "Parent DSCR %ld was not inherited "
+				"over exec (kernel value)\n", parent_dscr);
+		exit(1);
+	}
+
+	if (cur_dscr_usr != parent_dscr) {
+		fprintf(stderr, "Parent DSCR %ld was not inherited "
+				"over exec (user value)\n", parent_dscr);
+		exit(1);
+	}
+	exit(0);
+}
+
+int dscr_inherit_exec(void)
+{
+	unsigned long i, dscr = 0;
+	pid_t pid;
+
+	for (i = 0; i < COUNT; i++) {
+		dscr++;
+		if (dscr > DSCR_MAX)
+			dscr = 0;
+
+		if (dscr == get_default_dscr())
+			continue;
+
+		if (i % 2 == 0)
+			set_dscr_usr(dscr);
+		else
+			set_dscr(dscr);
+
+		pid = fork();
+		if (pid == -1) {
+			perror("fork() failed");
+			exit(1);
+		} else if (pid) {
+			int status;
+
+			if (waitpid(pid, &status, 0) == -1) {
+				perror("waitpid() failed");
+				exit(1);
+			}
+
+			if (!WIFEXITED(status)) {
+				fprintf(stderr, "Child didn't exit cleanly\n");
+				exit(1);
+			}
+
+			if (WEXITSTATUS(status) != 0) {
+				fprintf(stderr, "Child didn't exit cleanly\n");
+				return 1;
+			}
+		} else {
+			char dscr_str[16];
+
+			sprintf(dscr_str, "%ld", dscr);
+			execlp(prog, prog, "exec", dscr_str, NULL);
+			exit(1);
+		}
+	}
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	if (argc == 3 && !strcmp(argv[1], "exec")) {
+		unsigned long parent_dscr;
+
+		parent_dscr = atoi(argv[2]);
+		do_exec(parent_dscr);
+	} else if (argc != 1) {
+		fprintf(stderr, "Usage: %s\n", argv[0]);
+		exit(1);
+	}
+
+	prog = argv[0];
+	return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
new file mode 100644
index 000000000..3e5a6d195
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -0,0 +1,87 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork test
+ *
+ * This testcase modifies the DSCR using mtspr, forks and then
+ * verifies that the child process has the correct changed DSCR
+ * value using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_inherit(void)
+{
+	unsigned long i, dscr = 0;
+	pid_t pid;
+
+	srand(getpid());
+	set_dscr(dscr);
+
+	for (i = 0; i < COUNT; i++) {
+		unsigned long cur_dscr, cur_dscr_usr;
+
+		dscr++;
+		if (dscr > DSCR_MAX)
+			dscr = 0;
+
+		if (i % 2 == 0)
+			set_dscr_usr(dscr);
+		else
+			set_dscr(dscr);
+
+		pid = fork();
+		if (pid == -1) {
+			perror("fork() failed");
+			exit(1);
+		} else if (pid) {
+			int status;
+
+			if (waitpid(pid, &status, 0) == -1) {
+				perror("waitpid() failed");
+				exit(1);
+			}
+
+			if (!WIFEXITED(status)) {
+				fprintf(stderr, "Child didn't exit cleanly\n");
+				exit(1);
+			}
+
+			if (WEXITSTATUS(status) != 0) {
+				fprintf(stderr, "Child didn't exit cleanly\n");
+				return 1;
+			}
+		} else {
+			cur_dscr = get_dscr();
+			if (cur_dscr != dscr) {
+				fprintf(stderr, "Kernel DSCR should be %ld "
+					"but is %ld\n", dscr, cur_dscr);
+				exit(1);
+			}
+
+			cur_dscr_usr = get_dscr_usr();
+			if (cur_dscr_usr != dscr) {
+				fprintf(stderr, "User DSCR should be %ld "
+					"but is %ld\n", dscr, cur_dscr_usr);
+				exit(1);
+			}
+			exit(0);
+		}
+	}
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(dscr_inherit, "dscr_inherit_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
new file mode 100644
index 000000000..1899bd851
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -0,0 +1,101 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs interface test
+ *
+ * This test updates to system wide DSCR default through the sysfs interface
+ * and then verifies that all the CPU specific DSCR defaults are updated as
+ * well verified from their sysfs interfaces.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_cpu_dscr_default(char *file, unsigned long val)
+{
+	char buf[10];
+	int fd, rc;
+
+	fd = open(file, O_RDWR);
+	if (fd == -1) {
+		perror("open() failed");
+		return 1;
+	}
+
+	rc = read(fd, buf, sizeof(buf));
+	if (rc == -1) {
+		perror("read() failed");
+		return 1;
+	}
+	close(fd);
+
+	buf[rc] = '\0';
+	if (strtol(buf, NULL, 16) != val) {
+		printf("DSCR match failed: %ld (system) %ld (cpu)\n",
+					val, strtol(buf, NULL, 16));
+		return 1;
+	}
+	return 0;
+}
+
+static int check_all_cpu_dscr_defaults(unsigned long val)
+{
+	DIR *sysfs;
+	struct dirent *dp;
+	char file[LEN_MAX];
+
+	sysfs = opendir(CPU_PATH);
+	if (!sysfs) {
+		perror("opendir() failed");
+		return 1;
+	}
+
+	while ((dp = readdir(sysfs))) {
+		int len;
+
+		if (!(dp->d_type & DT_DIR))
+			continue;
+		if (!strcmp(dp->d_name, "cpuidle"))
+			continue;
+		if (!strstr(dp->d_name, "cpu"))
+			continue;
+
+		len = snprintf(file, LEN_MAX, "%s%s/dscr", CPU_PATH, dp->d_name);
+		if (len >= LEN_MAX)
+			continue;
+		if (access(file, F_OK))
+			continue;
+
+		if (check_cpu_dscr_default(file, val))
+			return 1;
+	}
+	closedir(sysfs);
+	return 0;
+}
+
+int dscr_sysfs(void)
+{
+	unsigned long orig_dscr_default;
+	int i, j;
+
+	orig_dscr_default = get_default_dscr();
+	for (i = 0; i < COUNT; i++) {
+		for (j = 0; j < DSCR_MAX; j++) {
+			set_default_dscr(j);
+			if (check_all_cpu_dscr_defaults(j))
+				goto fail;
+		}
+	}
+	set_default_dscr(orig_dscr_default);
+	return 0;
+fail:
+	set_default_dscr(orig_dscr_default);
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(dscr_sysfs, "dscr_sysfs_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
new file mode 100644
index 000000000..ad97b592e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -0,0 +1,80 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs thread test
+ *
+ * This test updates the system wide DSCR default value through
+ * sysfs interface which should then update all the CPU specific
+ * DSCR default values which must also be then visible to threads
+ * executing on individual CPUs on the system.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include "dscr.h"
+
+static int test_thread_dscr(unsigned long val)
+{
+	unsigned long cur_dscr, cur_dscr_usr;
+
+	cur_dscr = get_dscr();
+	cur_dscr_usr = get_dscr_usr();
+
+	if (val != cur_dscr) {
+		printf("[cpu %d] Kernel DSCR should be %ld but is %ld\n",
+					sched_getcpu(), val, cur_dscr);
+		return 1;
+	}
+
+	if (val != cur_dscr_usr) {
+		printf("[cpu %d] User DSCR should be %ld but is %ld\n",
+					sched_getcpu(), val, cur_dscr_usr);
+		return 1;
+	}
+	return 0;
+}
+
+static int check_cpu_dscr_thread(unsigned long val)
+{
+	cpu_set_t mask;
+	int cpu;
+
+	for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+		CPU_ZERO(&mask);
+		CPU_SET(cpu, &mask);
+		if (sched_setaffinity(0, sizeof(mask), &mask))
+			continue;
+
+		if (test_thread_dscr(val))
+			return 1;
+	}
+	return 0;
+
+}
+
+int dscr_sysfs_thread(void)
+{
+	unsigned long orig_dscr_default;
+	int i, j;
+
+	orig_dscr_default = get_default_dscr();
+	for (i = 0; i < COUNT; i++) {
+		for (j = 0; j < DSCR_MAX; j++) {
+			set_default_dscr(j);
+			if (check_cpu_dscr_thread(j))
+				goto fail;
+		}
+	}
+	set_default_dscr(orig_dscr_default);
+	return 0;
+fail:
+	set_default_dscr(orig_dscr_default);
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(dscr_sysfs_thread, "dscr_sysfs_thread_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
new file mode 100644
index 000000000..77d16b5e7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -0,0 +1,61 @@
+/*
+ * POWER Data Stream Control Register (DSCR) SPR test
+ *
+ * This test modifies the DSCR value through both the SPR number
+ * based mtspr instruction and then makes sure that the same is
+ * reflected through mfspr instruction using either of the SPR
+ * numbers.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2013, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_dscr(char *str)
+{
+	unsigned long cur_dscr, cur_dscr_usr;
+
+	cur_dscr = get_dscr();
+	cur_dscr_usr = get_dscr_usr();
+	if (cur_dscr != cur_dscr_usr) {
+		printf("%s set, kernel get %lx != user get %lx\n",
+					str, cur_dscr, cur_dscr_usr);
+		return 1;
+	}
+	return 0;
+}
+
+int dscr_user(void)
+{
+	int i;
+
+	check_dscr("");
+
+	for (i = 0; i < COUNT; i++) {
+		set_dscr(i);
+		if (check_dscr("kernel"))
+			return 1;
+	}
+
+	for (i = 0; i < COUNT; i++) {
+		set_dscr_usr(i);
+		if (check_dscr("user"))
+			return 1;
+	}
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(dscr_user, "dscr_user_test");
+}
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
new file mode 100644
index 000000000..9d7166dfa
--- /dev/null
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <link.h>
+#include <sys/stat.h>
+
+#include "subunit.h"
+#include "utils.h"
+
+#define KILL_TIMEOUT	5
+
+static uint64_t timeout = 120;
+
+int run_test(int (test_function)(void), char *name)
+{
+	bool terminated;
+	int rc, status;
+	pid_t pid;
+
+	/* Make sure output is flushed before forking */
+	fflush(stdout);
+
+	pid = fork();
+	if (pid == 0) {
+		setpgid(0, 0);
+		exit(test_function());
+	} else if (pid == -1) {
+		perror("fork");
+		return 1;
+	}
+
+	setpgid(pid, pid);
+
+	/* Wake us up in timeout seconds */
+	alarm(timeout);
+	terminated = false;
+
+wait:
+	rc = waitpid(pid, &status, 0);
+	if (rc == -1) {
+		if (errno != EINTR) {
+			printf("unknown error from waitpid\n");
+			return 1;
+		}
+
+		if (terminated) {
+			printf("!! force killing %s\n", name);
+			kill(-pid, SIGKILL);
+			return 1;
+		} else {
+			printf("!! killing %s\n", name);
+			kill(-pid, SIGTERM);
+			terminated = true;
+			alarm(KILL_TIMEOUT);
+			goto wait;
+		}
+	}
+
+	/* Kill anything else in the process group that is still running */
+	kill(-pid, SIGTERM);
+
+	if (WIFEXITED(status))
+		status = WEXITSTATUS(status);
+	else {
+		if (WIFSIGNALED(status))
+			printf("!! child died by signal %d\n", WTERMSIG(status));
+		else
+			printf("!! child died by unknown cause\n");
+
+		status = 1; /* Signal or other */
+	}
+
+	return status;
+}
+
+static void sig_handler(int signum)
+{
+	/* Just wake us up from waitpid */
+}
+
+static struct sigaction sig_action = {
+	.sa_handler = sig_handler,
+};
+
+void test_harness_set_timeout(uint64_t time)
+{
+	timeout = time;
+}
+
+int test_harness(int (test_function)(void), char *name)
+{
+	int rc;
+
+	test_start(name);
+	test_set_git_version(GIT_VERSION);
+
+	if (sigaction(SIGINT, &sig_action, NULL)) {
+		perror("sigaction (sigint)");
+		test_error(name);
+		return 1;
+	}
+
+	if (sigaction(SIGALRM, &sig_action, NULL)) {
+		perror("sigaction (sigalrm)");
+		test_error(name);
+		return 1;
+	}
+
+	rc = run_test(test_function, name);
+
+	if (rc == MAGIC_SKIP_RETURN_VALUE) {
+		test_skip(name);
+		/* so that skipped test is not marked as failed */
+		rc = 0;
+	} else
+		test_finish(name, rc);
+
+	return rc;
+}
diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h
new file mode 100644
index 000000000..886dc026f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/basic_asm.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SELFTESTS_POWERPC_BASIC_ASM_H
+#define _SELFTESTS_POWERPC_BASIC_ASM_H
+
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+#define LOAD_REG_IMMEDIATE(reg, expr) \
+	lis	reg, (expr)@highest;	\
+	ori	reg, reg, (expr)@higher;	\
+	rldicr	reg, reg, 32, 31;	\
+	oris	reg, reg, (expr)@high;	\
+	ori	reg, reg, (expr)@l;
+
+/*
+ * Note: These macros assume that variables being stored on the stack are
+ * doublewords, while this is usually the case it may not always be the
+ * case for each use case.
+ */
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define STACK_FRAME_MIN_SIZE 32
+#define STACK_FRAME_TOC_POS  24
+#define __STACK_FRAME_PARAM(_param)  (32 + ((_param)*8))
+#define __STACK_FRAME_LOCAL(_num_params, _var_num)  \
+	((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8))
+#else
+#define STACK_FRAME_MIN_SIZE 112
+#define STACK_FRAME_TOC_POS  40
+#define __STACK_FRAME_PARAM(i)  (48 + ((i)*8))
+
+/*
+ * Caveat: if a function passed more than 8 doublewords, the caller will have
+ * made more space... which would render the 112 incorrect.
+ */
+#define __STACK_FRAME_LOCAL(_num_params, _var_num)  \
+	(112 + ((_var_num)*8))
+#endif
+
+/* Parameter x saved to the stack */
+#define STACK_FRAME_PARAM(var)    __STACK_FRAME_PARAM(var)
+
+/* Local variable x saved to the stack after x parameters */
+#define STACK_FRAME_LOCAL(num_params, var)    \
+	__STACK_FRAME_LOCAL(num_params, var)
+#define STACK_FRAME_LR_POS   16
+#define STACK_FRAME_CR_POS   8
+
+/*
+ * It is very important to note here that _extra is the extra amount of
+ * stack space needed. This space can be accessed using STACK_FRAME_PARAM()
+ * or STACK_FRAME_LOCAL() macros.
+ *
+ * r1 and r2 are not defined in ppc-asm.h (instead they are defined as sp
+ * and toc). Kernel programmers tend to prefer rX even for r1 and r2, hence
+ * %1 and %r2. r0 is defined in ppc-asm.h and therefore %r0 gets
+ * preprocessed incorrectly, hence r0.
+ */
+#define PUSH_BASIC_STACK(_extra) \
+	mflr	r0; \
+	std	r0, STACK_FRAME_LR_POS(%r1); \
+	stdu	%r1, -(_extra + STACK_FRAME_MIN_SIZE)(%r1); \
+	mfcr	r0; \
+	stw	r0, STACK_FRAME_CR_POS(%r1); \
+	std	%r2, STACK_FRAME_TOC_POS(%r1);
+
+#define POP_BASIC_STACK(_extra) \
+	ld	%r2, STACK_FRAME_TOC_POS(%r1); \
+	lwz	r0, STACK_FRAME_CR_POS(%r1); \
+	mtcr	r0; \
+	addi	%r1, %r1, (_extra + STACK_FRAME_MIN_SIZE); \
+	ld	r0, STACK_FRAME_LR_POS(%r1); \
+	mtlr	r0;
+
+#endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/include/fpu_asm.h b/tools/testing/selftests/powerpc/include/fpu_asm.h
new file mode 100644
index 000000000..6a387d255
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/fpu_asm.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _SELFTESTS_POWERPC_FPU_ASM_H
+#define _SELFTESTS_POWERPC_FPU_ASM_H
+#include "basic_asm.h"
+
+#define PUSH_FPU(stack_size) \
+	stfd	f31,(stack_size + STACK_FRAME_MIN_SIZE)(%r1); \
+	stfd	f30,(stack_size + STACK_FRAME_MIN_SIZE - 8)(%r1); \
+	stfd	f29,(stack_size + STACK_FRAME_MIN_SIZE - 16)(%r1); \
+	stfd	f28,(stack_size + STACK_FRAME_MIN_SIZE - 24)(%r1); \
+	stfd	f27,(stack_size + STACK_FRAME_MIN_SIZE - 32)(%r1); \
+	stfd	f26,(stack_size + STACK_FRAME_MIN_SIZE - 40)(%r1); \
+	stfd	f25,(stack_size + STACK_FRAME_MIN_SIZE - 48)(%r1); \
+	stfd	f24,(stack_size + STACK_FRAME_MIN_SIZE - 56)(%r1); \
+	stfd	f23,(stack_size + STACK_FRAME_MIN_SIZE - 64)(%r1); \
+	stfd	f22,(stack_size + STACK_FRAME_MIN_SIZE - 72)(%r1); \
+	stfd	f21,(stack_size + STACK_FRAME_MIN_SIZE - 80)(%r1); \
+	stfd	f20,(stack_size + STACK_FRAME_MIN_SIZE - 88)(%r1); \
+	stfd	f19,(stack_size + STACK_FRAME_MIN_SIZE - 96)(%r1); \
+	stfd	f18,(stack_size + STACK_FRAME_MIN_SIZE - 104)(%r1); \
+	stfd	f17,(stack_size + STACK_FRAME_MIN_SIZE - 112)(%r1); \
+	stfd	f16,(stack_size + STACK_FRAME_MIN_SIZE - 120)(%r1); \
+	stfd	f15,(stack_size + STACK_FRAME_MIN_SIZE - 128)(%r1); \
+	stfd	f14,(stack_size + STACK_FRAME_MIN_SIZE - 136)(%r1);
+
+#define POP_FPU(stack_size) \
+	lfd	f31,(stack_size + STACK_FRAME_MIN_SIZE)(%r1); \
+	lfd	f30,(stack_size + STACK_FRAME_MIN_SIZE - 8)(%r1); \
+	lfd	f29,(stack_size + STACK_FRAME_MIN_SIZE - 16)(%r1); \
+	lfd	f28,(stack_size + STACK_FRAME_MIN_SIZE - 24)(%r1); \
+	lfd	f27,(stack_size + STACK_FRAME_MIN_SIZE - 32)(%r1); \
+	lfd	f26,(stack_size + STACK_FRAME_MIN_SIZE - 40)(%r1); \
+	lfd	f25,(stack_size + STACK_FRAME_MIN_SIZE - 48)(%r1); \
+	lfd	f24,(stack_size + STACK_FRAME_MIN_SIZE - 56)(%r1); \
+	lfd	f23,(stack_size + STACK_FRAME_MIN_SIZE - 64)(%r1); \
+	lfd	f22,(stack_size + STACK_FRAME_MIN_SIZE - 72)(%r1); \
+	lfd	f21,(stack_size + STACK_FRAME_MIN_SIZE - 80)(%r1); \
+	lfd	f20,(stack_size + STACK_FRAME_MIN_SIZE - 88)(%r1); \
+	lfd	f19,(stack_size + STACK_FRAME_MIN_SIZE - 96)(%r1); \
+	lfd	f18,(stack_size + STACK_FRAME_MIN_SIZE - 104)(%r1); \
+	lfd	f17,(stack_size + STACK_FRAME_MIN_SIZE - 112)(%r1); \
+	lfd	f16,(stack_size + STACK_FRAME_MIN_SIZE - 120)(%r1); \
+	lfd	f15,(stack_size + STACK_FRAME_MIN_SIZE - 128)(%r1); \
+	lfd	f14,(stack_size + STACK_FRAME_MIN_SIZE - 136)(%r1);
+
+/*
+ * Careful calling this, it will 'clobber' fpu (by design)
+ * Don't call this from C
+ */
+FUNC_START(load_fpu)
+	lfd	f14,0(r3)
+	lfd	f15,8(r3)
+	lfd	f16,16(r3)
+	lfd	f17,24(r3)
+	lfd	f18,32(r3)
+	lfd	f19,40(r3)
+	lfd	f20,48(r3)
+	lfd	f21,56(r3)
+	lfd	f22,64(r3)
+	lfd	f23,72(r3)
+	lfd	f24,80(r3)
+	lfd	f25,88(r3)
+	lfd	f26,96(r3)
+	lfd	f27,104(r3)
+	lfd	f28,112(r3)
+	lfd	f29,120(r3)
+	lfd	f30,128(r3)
+	lfd	f31,136(r3)
+	blr
+FUNC_END(load_fpu)
+
+#endif /* _SELFTESTS_POWERPC_FPU_ASM_H */
diff --git a/tools/testing/selftests/powerpc/include/gpr_asm.h b/tools/testing/selftests/powerpc/include/gpr_asm.h
new file mode 100644
index 000000000..f6f38852d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/gpr_asm.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _SELFTESTS_POWERPC_GPR_ASM_H
+#define _SELFTESTS_POWERPC_GPR_ASM_H
+
+#include "basic_asm.h"
+
+#define __PUSH_NVREGS(top_pos); \
+	std r31,(top_pos)(%r1); \
+	std r30,(top_pos - 8)(%r1); \
+	std r29,(top_pos - 16)(%r1); \
+	std r28,(top_pos - 24)(%r1); \
+	std r27,(top_pos - 32)(%r1); \
+	std r26,(top_pos - 40)(%r1); \
+	std r25,(top_pos - 48)(%r1); \
+	std r24,(top_pos - 56)(%r1); \
+	std r23,(top_pos - 64)(%r1); \
+	std r22,(top_pos - 72)(%r1); \
+	std r21,(top_pos - 80)(%r1); \
+	std r20,(top_pos - 88)(%r1); \
+	std r19,(top_pos - 96)(%r1); \
+	std r18,(top_pos - 104)(%r1); \
+	std r17,(top_pos - 112)(%r1); \
+	std r16,(top_pos - 120)(%r1); \
+	std r15,(top_pos - 128)(%r1); \
+	std r14,(top_pos - 136)(%r1)
+
+#define __POP_NVREGS(top_pos); \
+	ld r31,(top_pos)(%r1); \
+	ld r30,(top_pos - 8)(%r1); \
+	ld r29,(top_pos - 16)(%r1); \
+	ld r28,(top_pos - 24)(%r1); \
+	ld r27,(top_pos - 32)(%r1); \
+	ld r26,(top_pos - 40)(%r1); \
+	ld r25,(top_pos - 48)(%r1); \
+	ld r24,(top_pos - 56)(%r1); \
+	ld r23,(top_pos - 64)(%r1); \
+	ld r22,(top_pos - 72)(%r1); \
+	ld r21,(top_pos - 80)(%r1); \
+	ld r20,(top_pos - 88)(%r1); \
+	ld r19,(top_pos - 96)(%r1); \
+	ld r18,(top_pos - 104)(%r1); \
+	ld r17,(top_pos - 112)(%r1); \
+	ld r16,(top_pos - 120)(%r1); \
+	ld r15,(top_pos - 128)(%r1); \
+	ld r14,(top_pos - 136)(%r1)
+
+#define PUSH_NVREGS(stack_size) \
+	__PUSH_NVREGS(stack_size + STACK_FRAME_MIN_SIZE)
+
+/* 18 NV FPU REGS */
+#define PUSH_NVREGS_BELOW_FPU(stack_size) \
+	__PUSH_NVREGS(stack_size + STACK_FRAME_MIN_SIZE - (18 * 8))
+
+#define POP_NVREGS(stack_size) \
+	__POP_NVREGS(stack_size + STACK_FRAME_MIN_SIZE)
+
+/* 18 NV FPU REGS */
+#define POP_NVREGS_BELOW_FPU(stack_size) \
+	__POP_NVREGS(stack_size + STACK_FRAME_MIN_SIZE - (18 * 8))
+
+/*
+ * Careful calling this, it will 'clobber' NVGPRs (by design)
+ * Don't call this from C
+ */
+FUNC_START(load_gpr)
+	ld	r14,0(r3)
+	ld	r15,8(r3)
+	ld	r16,16(r3)
+	ld	r17,24(r3)
+	ld	r18,32(r3)
+	ld	r19,40(r3)
+	ld	r20,48(r3)
+	ld	r21,56(r3)
+	ld	r22,64(r3)
+	ld	r23,72(r3)
+	ld	r24,80(r3)
+	ld	r25,88(r3)
+	ld	r26,96(r3)
+	ld	r27,104(r3)
+	ld	r28,112(r3)
+	ld	r29,120(r3)
+	ld	r30,128(r3)
+	ld	r31,136(r3)
+	blr
+FUNC_END(load_gpr)
+
+
+#endif /* _SELFTESTS_POWERPC_GPR_ASM_H */
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
new file mode 100644
index 000000000..f36061eb6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SELFTESTS_POWERPC_INSTRUCTIONS_H
+#define _SELFTESTS_POWERPC_INSTRUCTIONS_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */
+#define __COPY(RA, RB, L) \
+	(0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10))
+#define COPY(RA, RB, L) \
+	.long __COPY((RA), (RB), (L))
+
+static inline void copy(void *i)
+{
+	asm volatile(str(COPY(0, %0, 0))";"
+			:
+			: "b" (i)
+			: "memory"
+		    );
+}
+
+static inline void copy_first(void *i)
+{
+	asm volatile(str(COPY(0, %0, 1))";"
+			:
+			: "b" (i)
+			: "memory"
+		    );
+}
+
+/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */
+#define __PASTE(RA, RB, L, RC) \
+	(0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31))
+#define PASTE(RA, RB, L, RC) \
+	.long __PASTE((RA), (RB), (L), (RC))
+
+static inline int paste(void *i)
+{
+	int cr;
+
+	asm volatile(str(PASTE(0, %1, 0, 0))";"
+			"mfcr %0;"
+			: "=r" (cr)
+			: "b" (i)
+			: "memory"
+		    );
+	return cr;
+}
+
+static inline int paste_last(void *i)
+{
+	int cr;
+
+	asm volatile(str(PASTE(0, %1, 1, 1))";"
+			"mfcr %0;"
+			: "=r" (cr)
+			: "b" (i)
+			: "memory"
+		    );
+	return cr;
+}
+
+#define PPC_INST_COPY                  __COPY(0, 0, 0)
+#define PPC_INST_COPY_FIRST            __COPY(0, 0, 1)
+#define PPC_INST_PASTE                 __PASTE(0, 0, 0, 0)
+#define PPC_INST_PASTE_LAST            __PASTE(0, 0, 1, 1)
+
+#endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
new file mode 100644
index 000000000..7f348c059
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_REG_H
+#define _SELFTESTS_POWERPC_REG_H
+
+#define __stringify_1(x)        #x
+#define __stringify(x)          __stringify_1(x)
+
+#define mfspr(rn)	({unsigned long rval; \
+			 asm volatile("mfspr %0," _str(rn) \
+				    : "=r" (rval)); rval; })
+#define mtspr(rn, v)	asm volatile("mtspr " _str(rn) ",%0" : \
+				    : "r" ((unsigned long)(v)) \
+				    : "memory")
+
+#define mb()		asm volatile("sync" : : : "memory");
+
+#define SPRN_MMCR2     769
+#define SPRN_MMCRA     770
+#define SPRN_MMCR0     779
+#define   MMCR0_PMAO   0x00000080
+#define   MMCR0_PMAE   0x04000000
+#define   MMCR0_FC     0x80000000
+#define SPRN_EBBHR     804
+#define SPRN_EBBRR     805
+#define SPRN_BESCR     806     /* Branch event status & control register */
+#define SPRN_BESCRS    800     /* Branch event status & control set (1 bits set to 1) */
+#define SPRN_BESCRSU   801     /* Branch event status & control set upper */
+#define SPRN_BESCRR    802     /* Branch event status & control REset (1 bits set to 0) */
+#define SPRN_BESCRRU   803     /* Branch event status & control REset upper */
+
+#define BESCR_PMEO     0x1     /* PMU Event-based exception Occurred */
+#define BESCR_PME      (0x1ul << 32) /* PMU Event-based exception Enable */
+
+#define SPRN_PMC1      771
+#define SPRN_PMC2      772
+#define SPRN_PMC3      773
+#define SPRN_PMC4      774
+#define SPRN_PMC5      775
+#define SPRN_PMC6      776
+
+#define SPRN_SIAR      780
+#define SPRN_SDAR      781
+#define SPRN_SIER      768
+
+#define SPRN_TEXASR     0x82    /* Transaction Exception and Status Register */
+#define SPRN_TFIAR      0x81    /* Transaction Failure Inst Addr    */
+#define SPRN_TFHAR      0x80    /* Transaction Failure Handler Addr */
+#define SPRN_TAR        0x32f	/* Target Address Register */
+
+#define SPRN_DSCR_PRIV 0x11	/* Privilege State DSCR */
+#define SPRN_DSCR      0x03	/* Data Stream Control Register */
+#define SPRN_PPR       896	/* Program Priority Register */
+#define SPRN_AMR       13	/* Authority Mask Register - problem state */
+
+/* TEXASR register bits */
+#define TEXASR_FC	0xFE00000000000000
+#define TEXASR_FP	0x0100000000000000
+#define TEXASR_DA	0x0080000000000000
+#define TEXASR_NO	0x0040000000000000
+#define TEXASR_FO	0x0020000000000000
+#define TEXASR_SIC	0x0010000000000000
+#define TEXASR_NTC	0x0008000000000000
+#define TEXASR_TC	0x0004000000000000
+#define TEXASR_TIC	0x0002000000000000
+#define TEXASR_IC	0x0001000000000000
+#define TEXASR_IFC	0x0000800000000000
+#define TEXASR_ABT	0x0000000100000000
+#define TEXASR_SPD	0x0000000080000000
+#define TEXASR_HV	0x0000000020000000
+#define TEXASR_PR	0x0000000010000000
+#define TEXASR_FS	0x0000000008000000
+#define TEXASR_TE	0x0000000004000000
+#define TEXASR_ROT	0x0000000002000000
+
+/* Vector Instructions */
+#define VSX_XX1(xs, ra, rb)	(((xs) & 0x1f) << 21 | ((ra) << 16) |  \
+				 ((rb) << 11) | (((xs) >> 5)))
+#define STXVD2X(xs, ra, rb)	.long (0x7c000798 | VSX_XX1((xs), (ra), (rb)))
+#define LXVD2X(xs, ra, rb)	.long (0x7c000698 | VSX_XX1((xs), (ra), (rb)))
+
+#define ASM_LOAD_GPR_IMMED(_asm_symbol_name_immed) \
+		"li 14, %[" #_asm_symbol_name_immed "];" \
+		"li 15, %[" #_asm_symbol_name_immed "];" \
+		"li 16, %[" #_asm_symbol_name_immed "];" \
+		"li 17, %[" #_asm_symbol_name_immed "];" \
+		"li 18, %[" #_asm_symbol_name_immed "];" \
+		"li 19, %[" #_asm_symbol_name_immed "];" \
+		"li 20, %[" #_asm_symbol_name_immed "];" \
+		"li 21, %[" #_asm_symbol_name_immed "];" \
+		"li 22, %[" #_asm_symbol_name_immed "];" \
+		"li 23, %[" #_asm_symbol_name_immed "];" \
+		"li 24, %[" #_asm_symbol_name_immed "];" \
+		"li 25, %[" #_asm_symbol_name_immed "];" \
+		"li 26, %[" #_asm_symbol_name_immed "];" \
+		"li 27, %[" #_asm_symbol_name_immed "];" \
+		"li 28, %[" #_asm_symbol_name_immed "];" \
+		"li 29, %[" #_asm_symbol_name_immed "];" \
+		"li 30, %[" #_asm_symbol_name_immed "];" \
+		"li 31, %[" #_asm_symbol_name_immed "];"
+
+#define ASM_LOAD_FPR_SINGLE_PRECISION(_asm_symbol_name_addr) \
+		"lfs 0, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 1, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 2, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 3, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 4, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 5, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 6, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 7, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 8, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 9, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 10, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 11, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 12, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 13, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 14, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 15, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 16, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 17, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 18, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 19, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 20, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 21, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 22, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 23, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 24, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 25, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 26, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 27, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 28, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 29, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 30, 0(%[" #_asm_symbol_name_addr "]);" \
+		"lfs 31, 0(%[" #_asm_symbol_name_addr "]);"
+
+#ifndef __ASSEMBLER__
+void store_gpr(unsigned long *addr);
+void load_gpr(unsigned long *addr);
+void load_fpr_single_precision(float *addr);
+void store_fpr_single_precision(float *addr);
+#endif /* end of __ASSEMBLER__ */
+
+#endif /* _SELFTESTS_POWERPC_REG_H */
diff --git a/tools/testing/selftests/powerpc/include/subunit.h b/tools/testing/selftests/powerpc/include/subunit.h
new file mode 100644
index 000000000..9c6c4e901
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/subunit.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_SUBUNIT_H
+#define _SELFTESTS_POWERPC_SUBUNIT_H
+
+static inline void test_start(char *name)
+{
+	printf("test: %s\n", name);
+}
+
+static inline void test_failure_detail(char *name, char *detail)
+{
+	printf("failure: %s [%s]\n", name, detail);
+}
+
+static inline void test_failure(char *name)
+{
+	printf("failure: %s\n", name);
+}
+
+static inline void test_error(char *name)
+{
+	printf("error: %s\n", name);
+}
+
+static inline void test_skip(char *name)
+{
+	printf("skip: %s\n", name);
+}
+
+static inline void test_success(char *name)
+{
+	printf("success: %s\n", name);
+}
+
+static inline void test_finish(char *name, int status)
+{
+	if (status)
+		test_failure(name);
+	else
+		test_success(name);
+}
+
+static inline void test_set_git_version(char *value)
+{
+	printf("tags: git_version:%s\n", value);
+}
+
+#endif /* _SELFTESTS_POWERPC_SUBUNIT_H */
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
new file mode 100644
index 000000000..c58c37082
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_UTILS_H
+#define _SELFTESTS_POWERPC_UTILS_H
+
+#define __cacheline_aligned __attribute__((aligned(128)))
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/auxvec.h>
+#include "reg.h"
+
+/* Avoid headaches with PRI?64 - just use %ll? always */
+typedef unsigned long long u64;
+typedef   signed long long s64;
+
+/* Just for familiarity */
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+void test_harness_set_timeout(uint64_t time);
+int test_harness(int (test_function)(void), char *name);
+
+int read_auxv(char *buf, ssize_t buf_size);
+void *find_auxv_entry(int type, char *auxv);
+void *get_auxv_entry(int type);
+
+int pick_online_cpu(void);
+
+static inline bool have_hwcap(unsigned long ftr)
+{
+	return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
+}
+
+#ifdef AT_HWCAP2
+static inline bool have_hwcap2(unsigned long ftr2)
+{
+	return ((unsigned long)get_auxv_entry(AT_HWCAP2) & ftr2) == ftr2;
+}
+#else
+static inline bool have_hwcap2(unsigned long ftr2)
+{
+	return false;
+}
+#endif
+
+bool is_ppc64le(void);
+
+/* Yes, this is evil */
+#define FAIL_IF(x)						\
+do {								\
+	if ((x)) {						\
+		fprintf(stderr,					\
+		"[FAIL] Test FAILED on line %d\n", __LINE__);	\
+		return 1;					\
+	}							\
+} while (0)
+
+/* The test harness uses this, yes it's gross */
+#define MAGIC_SKIP_RETURN_VALUE	99
+
+#define SKIP_IF(x)						\
+do {								\
+	if ((x)) {						\
+		fprintf(stderr,					\
+		"[SKIP] Test skipped on line %d\n", __LINE__);	\
+		return MAGIC_SKIP_RETURN_VALUE;			\
+	}							\
+} while (0)
+
+#define _str(s) #s
+#define str(s) _str(s)
+
+/* POWER9 feature */
+#ifndef PPC_FEATURE2_ARCH_3_00
+#define PPC_FEATURE2_ARCH_3_00 0x00800000
+#endif
+
+#endif /* _SELFTESTS_POWERPC_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/include/vmx_asm.h b/tools/testing/selftests/powerpc/include/vmx_asm.h
new file mode 100644
index 000000000..2eaaeca9c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/vmx_asm.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+
+/* POS MUST BE 16 ALIGNED! */
+#define PUSH_VMX(pos,reg) \
+	li	reg,pos; \
+	stvx	v20,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v21,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v22,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v23,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v24,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v25,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v26,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v27,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v28,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v29,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v30,reg,%r1; \
+	addi	reg,reg,16; \
+	stvx	v31,reg,%r1;
+
+/* POS MUST BE 16 ALIGNED! */
+#define POP_VMX(pos,reg) \
+	li	reg,pos; \
+	lvx	v20,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v21,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v22,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v23,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v24,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v25,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v26,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v27,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v28,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v29,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v30,reg,%r1; \
+	addi	reg,reg,16; \
+	lvx	v31,reg,%r1;
+
+/*
+ * Careful this will 'clobber' vmx (by design)
+ * Don't call this from C
+ */
+FUNC_START(load_vmx)
+	li	r5,0
+	lvx	v20,r5,r3
+	addi	r5,r5,16
+	lvx	v21,r5,r3
+	addi	r5,r5,16
+	lvx	v22,r5,r3
+	addi	r5,r5,16
+	lvx	v23,r5,r3
+	addi	r5,r5,16
+	lvx	v24,r5,r3
+	addi	r5,r5,16
+	lvx	v25,r5,r3
+	addi	r5,r5,16
+	lvx	v26,r5,r3
+	addi	r5,r5,16
+	lvx	v27,r5,r3
+	addi	r5,r5,16
+	lvx	v28,r5,r3
+	addi	r5,r5,16
+	lvx	v29,r5,r3
+	addi	r5,r5,16
+	lvx	v30,r5,r3
+	addi	r5,r5,16
+	lvx	v31,r5,r3
+	blr
+FUNC_END(load_vmx)
diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h
new file mode 100644
index 000000000..54064ced9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/vsx_asm.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+
+/*
+ * Careful this will 'clobber' vsx (by design), VSX are always
+ * volatile though so unlike vmx this isn't so much of an issue
+ * Still should avoid calling from C
+ */
+FUNC_START(load_vsx)
+	li	r5,0
+	lxvd2x	vs20,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs21,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs22,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs23,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs24,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs25,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs26,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs27,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs28,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs29,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs30,r5,r3
+	addi	r5,r5,16
+	lxvd2x	vs31,r5,r3
+	blr
+FUNC_END(load_vsx)
+
+FUNC_START(store_vsx)
+	li	r5,0
+	stxvd2x	vs20,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs21,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs22,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs23,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs24,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs25,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs26,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs27,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs28,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs29,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs30,r5,r3
+	addi	r5,r5,16
+	stxvd2x	vs31,r5,r3
+	blr
+FUNC_END(store_vsx)
diff --git a/tools/testing/selftests/powerpc/lib/reg.S b/tools/testing/selftests/powerpc/lib/reg.S
new file mode 100644
index 000000000..0dc44f0da
--- /dev/null
+++ b/tools/testing/selftests/powerpc/lib/reg.S
@@ -0,0 +1,397 @@
+/*
+ * test helper assembly functions
+ *
+ * Copyright (C) 2016 Simon Guo, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <ppc-asm.h>
+#include "reg.h"
+
+
+/* Non volatile GPR - unsigned long buf[18] */
+FUNC_START(load_gpr)
+	ld	14, 0*8(3)
+	ld	15, 1*8(3)
+	ld	16, 2*8(3)
+	ld	17, 3*8(3)
+	ld	18, 4*8(3)
+	ld	19, 5*8(3)
+	ld	20, 6*8(3)
+	ld	21, 7*8(3)
+	ld	22, 8*8(3)
+	ld	23, 9*8(3)
+	ld	24, 10*8(3)
+	ld	25, 11*8(3)
+	ld	26, 12*8(3)
+	ld	27, 13*8(3)
+	ld	28, 14*8(3)
+	ld	29, 15*8(3)
+	ld	30, 16*8(3)
+	ld	31, 17*8(3)
+	blr
+FUNC_END(load_gpr)
+
+FUNC_START(store_gpr)
+	std	14, 0*8(3)
+	std	15, 1*8(3)
+	std	16, 2*8(3)
+	std	17, 3*8(3)
+	std	18, 4*8(3)
+	std	19, 5*8(3)
+	std	20, 6*8(3)
+	std	21, 7*8(3)
+	std	22, 8*8(3)
+	std	23, 9*8(3)
+	std	24, 10*8(3)
+	std	25, 11*8(3)
+	std	26, 12*8(3)
+	std	27, 13*8(3)
+	std	28, 14*8(3)
+	std	29, 15*8(3)
+	std	30, 16*8(3)
+	std	31, 17*8(3)
+	blr
+FUNC_END(store_gpr)
+
+/* Single Precision Float - float buf[32] */
+FUNC_START(load_fpr_single_precision)
+	lfs 0, 0*4(3)
+	lfs 1, 1*4(3)
+	lfs 2, 2*4(3)
+	lfs 3, 3*4(3)
+	lfs 4, 4*4(3)
+	lfs 5, 5*4(3)
+	lfs 6, 6*4(3)
+	lfs 7, 7*4(3)
+	lfs 8, 8*4(3)
+	lfs 9, 9*4(3)
+	lfs 10, 10*4(3)
+	lfs 11, 11*4(3)
+	lfs 12, 12*4(3)
+	lfs 13, 13*4(3)
+	lfs 14, 14*4(3)
+	lfs 15, 15*4(3)
+	lfs 16, 16*4(3)
+	lfs 17, 17*4(3)
+	lfs 18, 18*4(3)
+	lfs 19, 19*4(3)
+	lfs 20, 20*4(3)
+	lfs 21, 21*4(3)
+	lfs 22, 22*4(3)
+	lfs 23, 23*4(3)
+	lfs 24, 24*4(3)
+	lfs 25, 25*4(3)
+	lfs 26, 26*4(3)
+	lfs 27, 27*4(3)
+	lfs 28, 28*4(3)
+	lfs 29, 29*4(3)
+	lfs 30, 30*4(3)
+	lfs 31, 31*4(3)
+	blr
+FUNC_END(load_fpr_single_precision)
+
+/* Single Precision Float - float buf[32] */
+FUNC_START(store_fpr_single_precision)
+	stfs 0, 0*4(3)
+	stfs 1, 1*4(3)
+	stfs 2, 2*4(3)
+	stfs 3, 3*4(3)
+	stfs 4, 4*4(3)
+	stfs 5, 5*4(3)
+	stfs 6, 6*4(3)
+	stfs 7, 7*4(3)
+	stfs 8, 8*4(3)
+	stfs 9, 9*4(3)
+	stfs 10, 10*4(3)
+	stfs 11, 11*4(3)
+	stfs 12, 12*4(3)
+	stfs 13, 13*4(3)
+	stfs 14, 14*4(3)
+	stfs 15, 15*4(3)
+	stfs 16, 16*4(3)
+	stfs 17, 17*4(3)
+	stfs 18, 18*4(3)
+	stfs 19, 19*4(3)
+	stfs 20, 20*4(3)
+	stfs 21, 21*4(3)
+	stfs 22, 22*4(3)
+	stfs 23, 23*4(3)
+	stfs 24, 24*4(3)
+	stfs 25, 25*4(3)
+	stfs 26, 26*4(3)
+	stfs 27, 27*4(3)
+	stfs 28, 28*4(3)
+	stfs 29, 29*4(3)
+	stfs 30, 30*4(3)
+	stfs 31, 31*4(3)
+	blr
+FUNC_END(store_fpr_single_precision)
+
+/* VMX/VSX registers - unsigned long buf[128] */
+FUNC_START(loadvsx)
+	lis	4, 0
+	LXVD2X	(0,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(1,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(2,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(3,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(4,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(5,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(6,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(7,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(8,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(9,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(10,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(11,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(12,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(13,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(14,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(15,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(16,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(17,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(18,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(19,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(20,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(21,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(22,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(23,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(24,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(25,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(26,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(27,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(28,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(29,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(30,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(31,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(32,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(33,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(34,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(35,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(36,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(37,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(38,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(39,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(40,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(41,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(42,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(43,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(44,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(45,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(46,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(47,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(48,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(49,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(50,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(51,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(52,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(53,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(54,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(55,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(56,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(57,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(58,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(59,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(60,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(61,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(62,(4),(3))
+	addi	4, 4, 16
+	LXVD2X	(63,(4),(3))
+	blr
+FUNC_END(loadvsx)
+
+FUNC_START(storevsx)
+	lis	4, 0
+	STXVD2X	(0,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(1,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(2,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(3,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(4,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(5,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(6,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(7,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(8,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(9,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(10,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(11,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(12,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(13,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(14,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(15,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(16,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(17,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(18,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(19,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(20,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(21,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(22,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(23,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(24,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(25,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(26,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(27,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(28,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(29,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(30,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(31,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(32,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(33,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(34,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(35,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(36,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(37,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(38,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(39,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(40,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(41,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(42,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(43,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(44,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(45,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(46,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(47,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(48,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(49,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(50,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(51,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(52,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(53,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(54,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(55,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(56,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(57,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(58,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(59,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(60,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(61,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(62,(4),(3))
+	addi	4, 4, 16
+	STXVD2X	(63,(4),(3))
+	blr
+FUNC_END(storevsx)
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
new file mode 100644
index 000000000..50ded63e2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -0,0 +1,7 @@
+fpu_syscall
+vmx_syscall
+fpu_preempt
+vmx_preempt
+fpu_signal
+vmx_signal
+vsx_preempt
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
new file mode 100644
index 000000000..11a10d7a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
+
+$(OUTPUT)/fpu_syscall: fpu_asm.S
+$(OUTPUT)/fpu_preempt: fpu_asm.S
+$(OUTPUT)/fpu_signal:  fpu_asm.S
+
+$(OUTPUT)/vmx_syscall: vmx_asm.S
+$(OUTPUT)/vmx_preempt: vmx_asm.S
+$(OUTPUT)/vmx_signal: vmx_asm.S
+
+$(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
+$(OUTPUT)/vsx_preempt: vsx_asm.S
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S
new file mode 100644
index 000000000..8a04bb117
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_asm.S
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "fpu_asm.h"
+
+FUNC_START(check_fpu)
+	mr r4,r3
+	li	r3,1 # assume a bad result
+	lfd	f0,0(r4)
+	fcmpu	cr1,f0,f14
+	bne	cr1,1f
+	lfd	f0,8(r4)
+	fcmpu	cr1,f0,f15
+	bne	cr1,1f
+	lfd	f0,16(r4)
+	fcmpu	cr1,f0,f16
+	bne	cr1,1f
+	lfd	f0,24(r4)
+	fcmpu	cr1,f0,f17
+	bne	cr1,1f
+	lfd	f0,32(r4)
+	fcmpu	cr1,f0,f18
+	bne	cr1,1f
+	lfd	f0,40(r4)
+	fcmpu	cr1,f0,f19
+	bne	cr1,1f
+	lfd	f0,48(r4)
+	fcmpu	cr1,f0,f20
+	bne	cr1,1f
+	lfd	f0,56(r4)
+	fcmpu	cr1,f0,f21
+	bne	cr1,1f
+	lfd	f0,64(r4)
+	fcmpu	cr1,f0,f22
+	bne	cr1,1f
+	lfd	f0,72(r4)
+	fcmpu	cr1,f0,f23
+	bne	cr1,1f
+	lfd	f0,80(r4)
+	fcmpu	cr1,f0,f24
+	bne	cr1,1f
+	lfd	f0,88(r4)
+	fcmpu	cr1,f0,f25
+	bne	cr1,1f
+	lfd	f0,96(r4)
+	fcmpu	cr1,f0,f26
+	bne	cr1,1f
+	lfd	f0,104(r4)
+	fcmpu	cr1,f0,f27
+	bne	cr1,1f
+	lfd	f0,112(r4)
+	fcmpu	cr1,f0,f28
+	bne	cr1,1f
+	lfd	f0,120(r4)
+	fcmpu	cr1,f0,f29
+	bne	cr1,1f
+	lfd	f0,128(r4)
+	fcmpu	cr1,f0,f30
+	bne	cr1,1f
+	lfd	f0,136(r4)
+	fcmpu	cr1,f0,f31
+	bne	cr1,1f
+	li	r3,0 # Success!!!
+1:	blr
+
+FUNC_START(test_fpu)
+	# r3 holds pointer to where to put the result of fork
+	# r4 holds pointer to the pid
+	# f14-f31 are non volatiles
+	PUSH_BASIC_STACK(256)
+	PUSH_FPU(256)
+	std	r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
+	std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
+
+	bl load_fpu
+	nop
+	li	r0,__NR_fork
+	sc
+
+	# pass the result of the fork to the caller
+	ld	r9,STACK_FRAME_PARAM(1)(sp)
+	std	r3,0(r9)
+
+	ld r3,STACK_FRAME_PARAM(0)(sp)
+	bl check_fpu
+	nop
+
+	POP_FPU(256)
+	POP_BASIC_STACK(256)
+	blr
+FUNC_END(test_fpu)
+
+# int preempt_fpu(double *darray, int *threads_running, int *running)
+# On starting will (atomically) decrement not_ready as a signal that the FPU
+# has been loaded with darray. Will proceed to check the validity of the FPU
+# registers while running is not zero.
+FUNC_START(preempt_fpu)
+	PUSH_BASIC_STACK(256)
+	PUSH_FPU(256)
+	std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
+	std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+	std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+
+	bl load_fpu
+	nop
+
+	sync
+	# Atomic DEC
+	ld r3,STACK_FRAME_PARAM(1)(sp)
+1:	lwarx r4,0,r3
+	addi r4,r4,-1
+	stwcx. r4,0,r3
+	bne- 1b
+
+2:	ld r3,STACK_FRAME_PARAM(0)(sp)
+	bl check_fpu
+	nop
+	cmpdi r3,0
+	bne 3f
+	ld r4,STACK_FRAME_PARAM(2)(sp)
+	ld r5,0(r4)
+	cmpwi r5,0
+	bne 2b
+
+3:	POP_FPU(256)
+	POP_BASIC_STACK(256)
+	blr
+FUNC_END(preempt_fpu)
diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c
new file mode 100644
index 000000000..0f85b79d8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across preemption.
+ * Two things should be noted here a) The check_fpu function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+		     1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+		     2.1};
+
+int threads_starting;
+int running;
+
+extern void preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void *preempt_fpu_c(void *p)
+{
+	int i;
+	srand(pthread_self());
+	for (i = 0; i < 21; i++)
+		darray[i] = rand();
+
+	/* Test failed if it ever returns */
+	preempt_fpu(darray, &threads_starting, &running);
+
+	return p;
+}
+
+int test_preempt_fpu(void)
+{
+	int i, rc, threads;
+	pthread_t *tids;
+
+	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+	tids = malloc((threads) * sizeof(pthread_t));
+	FAIL_IF(!tids);
+
+	running = true;
+	threads_starting = threads;
+	for (i = 0; i < threads; i++) {
+		rc = pthread_create(&tids[i], NULL, preempt_fpu_c, NULL);
+		FAIL_IF(rc);
+	}
+
+	setbuf(stdout, NULL);
+	/* Not really necessary but nice to wait for every thread to start */
+	printf("\tWaiting for all workers to start...");
+	while(threads_starting)
+		asm volatile("": : :"memory");
+	printf("done\n");
+
+	printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+	sleep(PREEMPT_TIME);
+	printf("done\n");
+
+	printf("\tStopping workers...");
+	/*
+	 * Working are checking this value every loop. In preempt_fpu 'cmpwi r5,0; bne 2b'.
+	 * r5 will have loaded the value of running.
+	 */
+	running = 0;
+	for (i = 0; i < threads; i++) {
+		void *rc_p;
+		pthread_join(tids[i], &rc_p);
+
+		/*
+		 * Harness will say the fail was here, look at why preempt_fpu
+		 * returned
+		 */
+		if ((long) rc_p)
+			printf("oops\n");
+		FAIL_IF((long) rc_p);
+	}
+	printf("done\n");
+
+	free(tids);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_preempt_fpu, "fpu_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c
new file mode 100644
index 000000000..888aa51b4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_signal.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers are correctly reported in a
+ * signal context. Each worker just spins checking its FPU registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+		     1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+		     2.1};
+
+bool bad_context;
+int threads_starting;
+int running;
+
+extern long preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void signal_fpu_sig(int sig, siginfo_t *info, void *context)
+{
+	int i;
+	ucontext_t *uc = context;
+	mcontext_t *mc = &uc->uc_mcontext;
+
+	/* Only the non volatiles were loaded up */
+	for (i = 14; i < 32; i++) {
+		if (mc->fp_regs[i] != darray[i - 14]) {
+			bad_context = true;
+			break;
+		}
+	}
+}
+
+void *signal_fpu_c(void *p)
+{
+	int i;
+	long rc;
+	struct sigaction act;
+	act.sa_sigaction = signal_fpu_sig;
+	act.sa_flags = SA_SIGINFO;
+	rc = sigaction(SIGUSR1, &act, NULL);
+	if (rc)
+		return p;
+
+	srand(pthread_self());
+	for (i = 0; i < 21; i++)
+		darray[i] = rand();
+
+	rc = preempt_fpu(darray, &threads_starting, &running);
+
+	return (void *) rc;
+}
+
+int test_signal_fpu(void)
+{
+	int i, j, rc, threads;
+	void *rc_p;
+	pthread_t *tids;
+
+	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+	tids = malloc(threads * sizeof(pthread_t));
+	FAIL_IF(!tids);
+
+	running = true;
+	threads_starting = threads;
+	for (i = 0; i < threads; i++) {
+		rc = pthread_create(&tids[i], NULL, signal_fpu_c, NULL);
+		FAIL_IF(rc);
+	}
+
+	setbuf(stdout, NULL);
+	printf("\tWaiting for all workers to start...");
+	while (threads_starting)
+		asm volatile("": : :"memory");
+	printf("done\n");
+
+	printf("\tSending signals to all threads %d times...", ITERATIONS);
+	for (i = 0; i < ITERATIONS; i++) {
+		for (j = 0; j < threads; j++) {
+			pthread_kill(tids[j], SIGUSR1);
+		}
+		sleep(1);
+	}
+	printf("done\n");
+
+	printf("\tStopping workers...");
+	running = 0;
+	for (i = 0; i < threads; i++) {
+		pthread_join(tids[i], &rc_p);
+
+		/*
+		 * Harness will say the fail was here, look at why signal_fpu
+		 * returned
+		 */
+		if ((long) rc_p || bad_context)
+			printf("oops\n");
+		if (bad_context)
+			fprintf(stderr, "\t!! bad_context is true\n");
+		FAIL_IF((long) rc_p || bad_context);
+	}
+	printf("done\n");
+
+	free(tids);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_signal_fpu, "fpu_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c
new file mode 100644
index 000000000..949e67212
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across a syscall (fork).
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+
+#include "utils.h"
+
+extern int test_fpu(double *darray, pid_t *pid);
+
+double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+		     1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+		     2.1};
+
+int syscall_fpu(void)
+{
+	pid_t fork_pid;
+	int i;
+	int ret;
+	int child_ret;
+	for (i = 0; i < 1000; i++) {
+		/* test_fpu will fork() */
+		ret = test_fpu(darray, &fork_pid);
+		if (fork_pid == -1)
+			return -1;
+		if (fork_pid == 0)
+			exit(ret);
+		waitpid(fork_pid, &child_ret, 0);
+		if (ret || child_ret)
+			return 1;
+	}
+
+	return 0;
+}
+
+int test_syscall_fpu(void)
+{
+	/*
+	 * Setup an environment with much context switching
+	 */
+	pid_t pid2;
+	pid_t pid = fork();
+	int ret;
+	int child_ret;
+	FAIL_IF(pid == -1);
+
+	pid2 = fork();
+	/* Can't FAIL_IF(pid2 == -1); because already forked once */
+	if (pid2 == -1) {
+		/*
+		 * Couldn't fork, ensure test is a fail
+		 */
+		child_ret = ret = 1;
+	} else {
+		ret = syscall_fpu();
+		if (pid2)
+			waitpid(pid2, &child_ret, 0);
+		else
+			exit(ret);
+	}
+
+	ret |= child_ret;
+
+	if (pid)
+		waitpid(pid, &child_ret, 0);
+	else
+		exit(ret);
+
+	FAIL_IF(ret || child_ret);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_syscall_fpu, "syscall_fpu");
+
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_asm.S b/tools/testing/selftests/powerpc/math/vmx_asm.S
new file mode 100644
index 000000000..cb1e5ae1b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_asm.S
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "vmx_asm.h"
+
+# Should be safe from C, only touches r4, r5 and v0,v1,v2
+FUNC_START(check_vmx)
+	PUSH_BASIC_STACK(32)
+	mr r4,r3
+	li	r3,1 # assume a bad result
+	li	r5,0
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v20
+	vmr	v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v21
+	vand	v2,v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v22
+	vand	v2,v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v23
+	vand	v2,v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v24
+	vand	v2,v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v25
+	vand	v2,v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v26
+	vand	v2,v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v27
+	vand	v2,v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v28
+	vand	v2,v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v29
+	vand	v2,v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v30
+	vand	v2,v2,v1
+
+	addi	r5,r5,16
+	lvx	v0,r5,r4
+	vcmpequd.	v1,v0,v31
+	vand	v2,v2,v1
+
+	li	r5,STACK_FRAME_LOCAL(0,0)
+	stvx	v2,r5,sp
+	ldx	r0,r5,sp
+	cmpdi	r0,0xffffffffffffffff
+	bne	1f
+	li	r3,0
+1:	POP_BASIC_STACK(32)
+	blr
+FUNC_END(check_vmx)
+
+# Safe from C
+FUNC_START(test_vmx)
+	# r3 holds pointer to where to put the result of fork
+	# r4 holds pointer to the pid
+	# v20-v31 are non-volatile
+	PUSH_BASIC_STACK(512)
+	std	r3,STACK_FRAME_PARAM(0)(sp) # Address of varray
+	std r4,STACK_FRAME_PARAM(1)(sp) # address of pid
+	PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4)
+
+	bl load_vmx
+	nop
+
+	li	r0,__NR_fork
+	sc
+	# Pass the result of fork back to the caller
+	ld	r9,STACK_FRAME_PARAM(1)(sp)
+	std	r3,0(r9)
+
+	ld r3,STACK_FRAME_PARAM(0)(sp)
+	bl check_vmx
+	nop
+
+	POP_VMX(STACK_FRAME_LOCAL(2,0),r4)
+	POP_BASIC_STACK(512)
+	blr
+FUNC_END(test_vmx)
+
+# int preempt_vmx(vector int *varray, int *threads_starting, int *running)
+# On starting will (atomically) decrement threads_starting as a signal that
+# the VMX have been loaded with varray. Will proceed to check the validity of
+# the VMX registers while running is not zero.
+FUNC_START(preempt_vmx)
+	PUSH_BASIC_STACK(512)
+	std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
+	std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+	std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+	# VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0)
+	PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4)
+
+	bl load_vmx
+	nop
+
+	sync
+	# Atomic DEC
+	ld r3,STACK_FRAME_PARAM(1)(sp)
+1:	lwarx r4,0,r3
+	addi r4,r4,-1
+	stwcx. r4,0,r3
+	bne- 1b
+
+2:	ld r3,STACK_FRAME_PARAM(0)(sp)
+	bl check_vmx
+	nop
+	cmpdi r3,0
+	bne 3f
+	ld r4,STACK_FRAME_PARAM(2)(sp)
+	ld r5,0(r4)
+	cmpwi r5,0
+	bne 2b
+
+3:	POP_VMX(STACK_FRAME_LOCAL(4,0),r4)
+	POP_BASIC_STACK(512)
+	blr
+FUNC_END(preempt_vmx)
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
new file mode 100644
index 000000000..9ef376c55
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers change across preemption.
+ * Two things should be noted here a) The check_vmx function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+	{13,14,15,16},{17,18,19,20},{21,22,23,24},
+	{25,26,27,28},{29,30,31,32},{33,34,35,36},
+	{37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+int threads_starting;
+int running;
+
+extern void preempt_vmx(vector int *varray, int *threads_starting, int *running);
+
+void *preempt_vmx_c(void *p)
+{
+	int i, j;
+	srand(pthread_self());
+	for (i = 0; i < 12; i++)
+		for (j = 0; j < 4; j++)
+			varray[i][j] = rand();
+
+	/* Test fails if it ever returns */
+	preempt_vmx(varray, &threads_starting, &running);
+	return p;
+}
+
+int test_preempt_vmx(void)
+{
+	int i, rc, threads;
+	pthread_t *tids;
+
+	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+	tids = malloc(threads * sizeof(pthread_t));
+	FAIL_IF(!tids);
+
+	running = true;
+	threads_starting = threads;
+	for (i = 0; i < threads; i++) {
+		rc = pthread_create(&tids[i], NULL, preempt_vmx_c, NULL);
+		FAIL_IF(rc);
+	}
+
+	setbuf(stdout, NULL);
+	/* Not really nessesary but nice to wait for every thread to start */
+	printf("\tWaiting for all workers to start...");
+	while(threads_starting)
+		asm volatile("": : :"memory");
+	printf("done\n");
+
+	printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+	sleep(PREEMPT_TIME);
+	printf("done\n");
+
+	printf("\tStopping workers...");
+	/*
+	 * Working are checking this value every loop. In preempt_vmx 'cmpwi r5,0; bne 2b'.
+	 * r5 will have loaded the value of running.
+	 */
+	running = 0;
+	for (i = 0; i < threads; i++) {
+		void *rc_p;
+		pthread_join(tids[i], &rc_p);
+
+		/*
+		 * Harness will say the fail was here, look at why preempt_vmx
+		 * returned
+		 */
+		if ((long) rc_p)
+			printf("oops\n");
+		FAIL_IF((long) rc_p);
+	}
+	printf("done\n");
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_preempt_vmx, "vmx_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
new file mode 100644
index 000000000..671d7533a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers are correctly reported in a
+ * signal context. Each worker just spins checking its VMX registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <altivec.h>
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+	{13,14,15,16},{17,18,19,20},{21,22,23,24},
+	{25,26,27,28},{29,30,31,32},{33,34,35,36},
+	{37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+bool bad_context;
+int running;
+int threads_starting;
+
+extern int preempt_vmx(vector int *varray, int *threads_starting, int *sentinal);
+
+void signal_vmx_sig(int sig, siginfo_t *info, void *context)
+{
+	int i;
+	ucontext_t *uc = context;
+	mcontext_t *mc = &uc->uc_mcontext;
+
+	/* Only the non volatiles were loaded up */
+	for (i = 20; i < 32; i++) {
+		if (memcmp(mc->v_regs->vrregs[i], &varray[i - 20], 16)) {
+			int j;
+			/*
+			 * Shouldn't printf() in a signal handler, however, this is a
+			 * test and we've detected failure. Understanding what failed
+			 * is paramount. All that happens after this is tests exit with
+			 * failure.
+			 */
+			printf("VMX mismatch at reg %d!\n", i);
+			printf("Reg | Actual                  | Expected\n");
+			for (j = 20; j < 32; j++) {
+				printf("%d  | 0x%04x%04x%04x%04x      | 0x%04x%04x%04x%04x\n", j, mc->v_regs->vrregs[j][0],
+					   mc->v_regs->vrregs[j][1], mc->v_regs->vrregs[j][2], mc->v_regs->vrregs[j][3],
+					   varray[j - 20][0], varray[j - 20][1], varray[j - 20][2], varray[j - 20][3]);
+			}
+			bad_context = true;
+			break;
+		}
+	}
+}
+
+void *signal_vmx_c(void *p)
+{
+	int i, j;
+	long rc;
+	struct sigaction act;
+	act.sa_sigaction = signal_vmx_sig;
+	act.sa_flags = SA_SIGINFO;
+	rc = sigaction(SIGUSR1, &act, NULL);
+	if (rc)
+		return p;
+
+	srand(pthread_self());
+	for (i = 0; i < 12; i++)
+		for (j = 0; j < 4; j++)
+			varray[i][j] = rand();
+
+	rc = preempt_vmx(varray, &threads_starting, &running);
+
+	return (void *) rc;
+}
+
+int test_signal_vmx(void)
+{
+	int i, j, rc, threads;
+	void *rc_p;
+	pthread_t *tids;
+
+	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+	tids = malloc(threads * sizeof(pthread_t));
+	FAIL_IF(!tids);
+
+	running = true;
+	threads_starting = threads;
+	for (i = 0; i < threads; i++) {
+		rc = pthread_create(&tids[i], NULL, signal_vmx_c, NULL);
+		FAIL_IF(rc);
+	}
+
+	setbuf(stdout, NULL);
+	printf("\tWaiting for %d workers to start... %d", threads, threads_starting);
+	while (threads_starting) {
+		asm volatile("": : :"memory");
+		usleep(1000);
+		printf(", %d", threads_starting);
+	}
+	printf(" ...done\n");
+
+	printf("\tSending signals to all threads %d times...", ITERATIONS);
+	for (i = 0; i < ITERATIONS; i++) {
+		for (j = 0; j < threads; j++) {
+			pthread_kill(tids[j], SIGUSR1);
+		}
+		sleep(1);
+	}
+	printf("done\n");
+
+	printf("\tKilling workers...");
+	running = 0;
+	for (i = 0; i < threads; i++) {
+		pthread_join(tids[i], &rc_p);
+
+		/*
+		 * Harness will say the fail was here, look at why signal_vmx
+		 * returned
+		 */
+		if ((long) rc_p || bad_context)
+			printf("oops\n");
+		if (bad_context)
+			fprintf(stderr, "\t!! bad_context is true\n");
+		FAIL_IF((long) rc_p || bad_context);
+	}
+	printf("done\n");
+
+	free(tids);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_signal_vmx, "vmx_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
new file mode 100644
index 000000000..a017918ee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers change across a syscall (fork).
+ */
+
+#include <altivec.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "utils.h"
+
+vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+	{13,14,15,16},{17,18,19,20},{21,22,23,24},
+	{25,26,27,28},{29,30,31,32},{33,34,35,36},
+	{37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+extern int test_vmx(vector int *varray, pid_t *pid);
+
+int vmx_syscall(void)
+{
+	pid_t fork_pid;
+	int i;
+	int ret;
+	int child_ret;
+	for (i = 0; i < 1000; i++) {
+		/* test_vmx will fork() */
+		ret = test_vmx(varray, &fork_pid);
+		if (fork_pid == -1)
+			return -1;
+		if (fork_pid == 0)
+			exit(ret);
+		waitpid(fork_pid, &child_ret, 0);
+		if (ret || child_ret)
+			return 1;
+	}
+
+	return 0;
+}
+
+int test_vmx_syscall(void)
+{
+	/*
+	 * Setup an environment with much context switching
+	 */
+	pid_t pid2;
+	pid_t pid = fork();
+	int ret;
+	int child_ret;
+	FAIL_IF(pid == -1);
+
+	pid2 = fork();
+	ret = vmx_syscall();
+	/* Can't FAIL_IF(pid2 == -1); because we've already forked */
+	if (pid2 == -1) {
+		/*
+		 * Couldn't fork, ensure child_ret is set and is a fail
+		 */
+		ret = child_ret = 1;
+	} else {
+		if (pid2)
+			waitpid(pid2, &child_ret, 0);
+		else
+			exit(ret);
+	}
+
+	ret |= child_ret;
+
+	if (pid)
+		waitpid(pid, &child_ret, 0);
+	else
+		exit(ret);
+
+	FAIL_IF(ret || child_ret);
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_vmx_syscall, "vmx_syscall");
+
+}
diff --git a/tools/testing/selftests/powerpc/math/vsx_asm.S b/tools/testing/selftests/powerpc/math/vsx_asm.S
new file mode 100644
index 000000000..8f431f6ab
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vsx_asm.S
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "vsx_asm.h"
+
+#long check_vsx(vector int *r3);
+#This function wraps storeing VSX regs to the end of an array and a
+#call to a comparison function in C which boils down to a memcmp()
+FUNC_START(check_vsx)
+	PUSH_BASIC_STACK(32)
+	std	r3,STACK_FRAME_PARAM(0)(sp)
+	addi r3, r3, 16 * 12 #Second half of array
+	bl store_vsx
+	ld r3,STACK_FRAME_PARAM(0)(sp)
+	bl vsx_memcmp
+	POP_BASIC_STACK(32)
+	blr
+FUNC_END(check_vsx)
+
+# int preempt_vmx(vector int *varray, int *threads_starting,
+#                 int *running);
+# On starting will (atomically) decrement threads_starting as a signal
+# that the VMX have been loaded with varray. Will proceed to check the
+# validity of the VMX registers while running is not zero.
+FUNC_START(preempt_vsx)
+	PUSH_BASIC_STACK(512)
+	std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
+	std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+	std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+
+	bl load_vsx
+	nop
+
+	sync
+	# Atomic DEC
+	ld r3,STACK_FRAME_PARAM(1)(sp)
+1:	lwarx r4,0,r3
+	addi r4,r4,-1
+	stwcx. r4,0,r3
+	bne- 1b
+
+2:	ld r3,STACK_FRAME_PARAM(0)(sp)
+	bl check_vsx
+	nop
+	cmpdi r3,0
+	bne 3f
+	ld r4,STACK_FRAME_PARAM(2)(sp)
+	ld r5,0(r4)
+	cmpwi r5,0
+	bne 2b
+
+3:	POP_BASIC_STACK(512)
+	blr
+FUNC_END(preempt_vsx)
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c
new file mode 100644
index 000000000..6387f03a0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VSX registers change across preemption.
+ * There is no way to be sure preemption happened so this test just
+ * uses many threads and a long wait. As such, a successful test
+ * doesn't mean much but a failure is bad.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+/*
+ * Ensure there is twice the number of non-volatile VMX regs!
+ * check_vmx() is going to use the other half as space to put the live
+ * registers before calling vsx_memcmp()
+ */
+__thread vector int varray[24] = {
+	{1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10,11,12},
+	{13,14,15,16}, {17,18,19,20}, {21,22,23,24},
+	{25,26,27,28}, {29,30,31,32}, {33,34,35,36},
+	{37,38,39,40}, {41,42,43,44}, {45,46,47,48}
+};
+
+int threads_starting;
+int running;
+
+extern long preempt_vsx(vector int *varray, int *threads_starting, int *running);
+
+long vsx_memcmp(vector int *a) {
+	vector int zero = {0, 0, 0, 0};
+	int i;
+
+	FAIL_IF(a != varray);
+
+	for(i = 0; i < 12; i++) {
+		if (memcmp(&a[i + 12], &zero, sizeof(vector int)) == 0) {
+			fprintf(stderr, "Detected zero from the VSX reg %d\n", i + 12);
+			return 2;
+		}
+	}
+
+	if (memcmp(a, &a[12], 12 * sizeof(vector int))) {
+		long *p = (long *)a;
+		fprintf(stderr, "VSX mismatch\n");
+		for (i = 0; i < 24; i=i+2)
+			fprintf(stderr, "%d: 0x%08lx%08lx | 0x%08lx%08lx\n",
+					i/2 + i%2 + 20, p[i], p[i + 1], p[i + 24], p[i + 25]);
+		return 1;
+	}
+	return 0;
+}
+
+void *preempt_vsx_c(void *p)
+{
+	int i, j;
+	long rc;
+	srand(pthread_self());
+	for (i = 0; i < 12; i++)
+		for (j = 0; j < 4; j++) {
+			varray[i][j] = rand();
+			/* Don't want zero because it hides kernel problems */
+			if (varray[i][j] == 0)
+				j--;
+		}
+	rc = preempt_vsx(varray, &threads_starting, &running);
+	if (rc == 2)
+		fprintf(stderr, "Caught zeros in VSX compares\n");
+	return (void *)rc;
+}
+
+int test_preempt_vsx(void)
+{
+	int i, rc, threads;
+	pthread_t *tids;
+
+	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+	tids = malloc(threads * sizeof(pthread_t));
+	FAIL_IF(!tids);
+
+	running = true;
+	threads_starting = threads;
+	for (i = 0; i < threads; i++) {
+		rc = pthread_create(&tids[i], NULL, preempt_vsx_c, NULL);
+		FAIL_IF(rc);
+	}
+
+	setbuf(stdout, NULL);
+	/* Not really nessesary but nice to wait for every thread to start */
+	printf("\tWaiting for %d workers to start...", threads_starting);
+	while(threads_starting)
+		asm volatile("": : :"memory");
+	printf("done\n");
+
+	printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+	sleep(PREEMPT_TIME);
+	printf("done\n");
+
+	printf("\tStopping workers...");
+	/*
+	 * Working are checking this value every loop. In preempt_vsx 'cmpwi r5,0; bne 2b'.
+	 * r5 will have loaded the value of running.
+	 */
+	running = 0;
+	for (i = 0; i < threads; i++) {
+		void *rc_p;
+		pthread_join(tids[i], &rc_p);
+
+		/*
+		 * Harness will say the fail was here, look at why preempt_vsx
+		 * returned
+		 */
+		if ((long) rc_p)
+			printf("oops\n");
+		FAIL_IF((long) rc_p);
+	}
+	printf("done\n");
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_preempt_vsx, "vsx_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
new file mode 100644
index 000000000..7d7c42ed6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -0,0 +1,5 @@
+hugetlb_vs_thp_test
+subpage_prot
+tempfile
+prot_sao
+segv_errors
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
new file mode 100644
index 000000000..869628234
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+	$(MAKE) -C ../
+
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
+TEST_GEN_PROGS_EXTENDED := tlbie_test
+TEST_GEN_FILES := tempfile
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+
+$(OUTPUT)/prot_sao: ../utils.c
+
+$(OUTPUT)/tempfile:
+	dd if=/dev/zero of=$@ bs=64k count=1
+
+$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
new file mode 100644
index 000000000..9932359ce
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+/* This must match the huge page & THP size */
+#define SIZE	(16 * 1024 * 1024)
+
+static int test_body(void)
+{
+	void *addr;
+	char *p;
+
+	addr = (void *)0xa0000000;
+
+	p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
+		 MAP_HUGETLB | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (p != MAP_FAILED) {
+		/*
+		 * Typically the mmap will fail because no huge pages are
+		 * allocated on the system. But if there are huge pages
+		 * allocated the mmap will succeed. That's fine too, we just
+		 * munmap here before continuing.  munmap() length of
+		 * MAP_HUGETLB memory must be hugepage aligned.
+		 */
+		if (munmap(addr, SIZE)) {
+			perror("munmap");
+			return 1;
+		}
+	}
+
+	p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
+		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (p == MAP_FAILED) {
+		printf("Mapping failed @ %p\n", addr);
+		perror("mmap");
+		return 1;
+	}
+
+	/*
+	 * Either a user or kernel access is sufficient to trigger the bug.
+	 * A kernel access is easier to spot & debug, as it will trigger the
+	 * softlockup or RCU stall detectors, and when the system is kicked
+	 * into xmon we get a backtrace in the kernel.
+	 *
+	 * A good option is:
+	 *  getcwd(p, SIZE);
+	 *
+	 * For the purposes of this testcase it's preferable to spin in
+	 * userspace, so the harness can kill us if we get stuck. That way we
+	 * see a test failure rather than a dead system.
+	 */
+	*p = 0xf;
+
+	munmap(addr, SIZE);
+
+	return 0;
+}
+
+static int test_main(void)
+{
+	int i;
+
+	/* 10,000 because it's a "bunch", and completes reasonably quickly */
+	for (i = 0; i < 10000; i++)
+		if (test_body())
+			return 1;
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_main, "hugetlb_vs_thp");
+}
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
new file mode 100644
index 000000000..611530d43
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/prot_sao.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2016, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include <asm/cputable.h>
+
+#include "utils.h"
+
+#define SIZE (64 * 1024)
+
+int test_prot_sao(void)
+{
+	char *p;
+
+	/* 2.06 or later should support SAO */
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+	/*
+	 * Ensure we can ask for PROT_SAO.
+	 * We can't really verify that it does the right thing, but at least we
+	 * confirm the kernel will accept it.
+	 */
+	p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO,
+		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	FAIL_IF(p == MAP_FAILED);
+
+	/* Write to the mapping, to at least cause a fault */
+	memset(p, 0xaa, SIZE);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_prot_sao, "prot-sao");
+}
diff --git a/tools/testing/selftests/powerpc/mm/segv_errors.c b/tools/testing/selftests/powerpc/mm/segv_errors.c
new file mode 100644
index 000000000..06ae76ee3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/segv_errors.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2017 John Sperbeck
+ *
+ * Test that an access to a mapped but inaccessible area causes a SEGV and
+ * reports si_code == SEGV_ACCERR.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <assert.h>
+#include <ucontext.h>
+
+#include "utils.h"
+
+static bool faulted;
+static int si_code;
+
+static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+	ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+	struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+	faulted = true;
+	si_code = info->si_code;
+	regs->nip += 4;
+}
+
+int test_segv_errors(void)
+{
+	struct sigaction act = {
+		.sa_sigaction = segv_handler,
+		.sa_flags = SA_SIGINFO,
+	};
+	char c, *p = NULL;
+
+	p = mmap(NULL, getpagesize(), 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	FAIL_IF(p == MAP_FAILED);
+
+	FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+	faulted = false;
+	si_code = 0;
+
+	/*
+	 * We just need a compiler barrier, but mb() works and has the nice
+	 * property of being easy to spot in the disassembly.
+	 */
+	mb();
+	c = *p;
+	mb();
+
+	FAIL_IF(!faulted);
+	FAIL_IF(si_code != SEGV_ACCERR);
+
+	faulted = false;
+	si_code = 0;
+
+	mb();
+	*p = c;
+	mb();
+
+	FAIL_IF(!faulted);
+	FAIL_IF(si_code != SEGV_ACCERR);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_segv_errors, "segv_errors");
+}
diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c
new file mode 100644
index 000000000..3ae77ba93
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+char *file_name;
+
+int in_test;
+volatile int faulted;
+volatile void *dar;
+int errors;
+
+static void segv(int signum, siginfo_t *info, void *ctxt_v)
+{
+	ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+	struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+	if (!in_test) {
+		fprintf(stderr, "Segfault outside of test !\n");
+		exit(1);
+	}
+
+	faulted = 1;
+	dar = (void *)regs->dar;
+	regs->nip += 4;
+}
+
+static inline void do_read(const volatile void *addr)
+{
+	int ret;
+
+	asm volatile("lwz %0,0(%1); twi 0,%0,0; isync;\n"
+		     : "=r" (ret) : "r" (addr) : "memory");
+}
+
+static inline void do_write(const volatile void *addr)
+{
+	int val = 0x1234567;
+
+	asm volatile("stw %0,0(%1); sync; \n"
+		     : : "r" (val), "r" (addr) : "memory");
+}
+
+static inline void check_faulted(void *addr, long page, long subpage, int write)
+{
+	int want_fault = (subpage == ((page + 3) % 16));
+
+	if (write)
+		want_fault |= (subpage == ((page + 1) % 16));
+
+	if (faulted != want_fault) {
+		printf("Failed at %p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n",
+		       addr, page, subpage, write,
+		       want_fault ? "fault" : "pass",
+		       faulted ? "fault" : "pass");
+		++errors;
+	}
+
+	if (faulted) {
+		if (dar != addr) {
+			printf("Fault expected at %p and happened at %p !\n",
+			       addr, dar);
+		}
+		faulted = 0;
+		asm volatile("sync" : : : "memory");
+	}
+}
+
+static int run_test(void *addr, unsigned long size)
+{
+	unsigned int *map;
+	long i, j, pages, err;
+
+	pages = size / 0x10000;
+	map = malloc(pages * 4);
+	assert(map);
+
+	/*
+	 * for each page, mark subpage i % 16 read only and subpage
+	 * (i + 3) % 16 inaccessible
+	 */
+	for (i = 0; i < pages; i++) {
+		map[i] = (0x40000000 >> (((i + 1) * 2) % 32)) |
+			(0xc0000000 >> (((i + 3) * 2) % 32));
+	}
+
+	err = syscall(__NR_subpage_prot, addr, size, map);
+	if (err) {
+		perror("subpage_perm");
+		return 1;
+	}
+	free(map);
+
+	in_test = 1;
+	errors = 0;
+	for (i = 0; i < pages; i++) {
+		for (j = 0; j < 16; j++, addr += 0x1000) {
+			do_read(addr);
+			check_faulted(addr, i, j, 0);
+			do_write(addr);
+			check_faulted(addr, i, j, 1);
+		}
+	}
+
+	in_test = 0;
+	if (errors) {
+		printf("%d errors detected\n", errors);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int syscall_available(void)
+{
+	int rc;
+
+	errno = 0;
+	rc = syscall(__NR_subpage_prot, 0, 0, 0);
+
+	return rc == 0 || (errno != ENOENT && errno != ENOSYS);
+}
+
+int test_anon(void)
+{
+	unsigned long align;
+	struct sigaction act = {
+		.sa_sigaction = segv,
+		.sa_flags = SA_SIGINFO
+	};
+	void *mallocblock;
+	unsigned long mallocsize;
+
+	SKIP_IF(!syscall_available());
+
+	if (getpagesize() != 0x10000) {
+		fprintf(stderr, "Kernel page size must be 64K!\n");
+		return 1;
+	}
+
+	sigaction(SIGSEGV, &act, NULL);
+
+	mallocsize = 4 * 16 * 1024 * 1024;
+
+	FAIL_IF(posix_memalign(&mallocblock, 64 * 1024, mallocsize));
+
+	align = (unsigned long)mallocblock;
+	if (align & 0xffff)
+		align = (align | 0xffff) + 1;
+
+	mallocblock = (void *)align;
+
+	printf("allocated malloc block of 0x%lx bytes at %p\n",
+	       mallocsize, mallocblock);
+
+	printf("testing malloc block...\n");
+
+	return run_test(mallocblock, mallocsize);
+}
+
+int test_file(void)
+{
+	struct sigaction act = {
+		.sa_sigaction = segv,
+		.sa_flags = SA_SIGINFO
+	};
+	void *fileblock;
+	off_t filesize;
+	int fd;
+
+	SKIP_IF(!syscall_available());
+
+	fd = open(file_name, O_RDWR);
+	if (fd == -1) {
+		perror("failed to open file");
+		return 1;
+	}
+	sigaction(SIGSEGV, &act, NULL);
+
+	filesize = lseek(fd, 0, SEEK_END);
+	if (filesize & 0xffff)
+		filesize &= ~0xfffful;
+
+	fileblock = mmap(NULL, filesize, PROT_READ | PROT_WRITE,
+			 MAP_SHARED, fd, 0);
+	if (fileblock == MAP_FAILED) {
+		perror("failed to map file");
+		return 1;
+	}
+	printf("allocated %s for 0x%lx bytes at %p\n",
+	       file_name, filesize, fileblock);
+
+	printf("testing file map...\n");
+
+	return run_test(fileblock, filesize);
+}
+
+int main(int argc, char *argv[])
+{
+	int rc;
+
+	rc = test_harness(test_anon, "subpage_prot_anon");
+	if (rc)
+		return rc;
+
+	if (argc > 1)
+		file_name = argv[1];
+	else
+		file_name = "tempfile";
+
+	return test_harness(test_file, "subpage_prot_file");
+}
diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c
new file mode 100644
index 000000000..f85a0938a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp.
+ */
+
+/*
+ *
+ * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store
+ * sequence in a loop. The same threads also rung a context switch task
+ * that does sched_yield() in loop.
+ *
+ * The snapshot thread mark the mmap area PROT_READ in between, make a copy
+ * and copy it back to the original area. This helps us to detect if any
+ * store continued to happen after we marked the memory PROT_READ.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/futex.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <time.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/prctl.h>
+
+static inline void dcbf(volatile unsigned int *addr)
+{
+	__asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory");
+}
+
+static void err_msg(char *msg)
+{
+
+	time_t now;
+	time(&now);
+	printf("=================================\n");
+	printf("    Error: %s\n", msg);
+	printf("    %s", ctime(&now));
+	printf("=================================\n");
+	exit(1);
+}
+
+static char *map1;
+static char *map2;
+static pid_t rim_process_pid;
+
+/*
+ * A "rim-sequence" is defined to be the sequence of the following
+ * operations performed on a memory word:
+ *	1) FLUSH the contents of that word.
+ *	2) LOAD the contents of that word.
+ *	3) COMPARE the contents of that word with the content that was
+ *	           previously stored at that word
+ *	4) STORE new content into that word.
+ *
+ * The threads in this test that perform the rim-sequence are termed
+ * as rim_threads.
+ */
+
+/*
+ * A "corruption" is defined to be the failed COMPARE operation in a
+ * rim-sequence.
+ *
+ * A rim_thread that detects a corruption informs about it to all the
+ * other rim_threads, and the mem_snapshot thread.
+ */
+static volatile unsigned int corruption_found;
+
+/*
+ * This defines the maximum number of rim_threads in this test.
+ *
+ * The THREAD_ID_BITS denote the number of bits required
+ * to represent the thread_ids [0..MAX_THREADS - 1].
+ * We are being a bit paranoid here and set it to 8 bits,
+ * though 6 bits suffice.
+ *
+ */
+#define MAX_THREADS 		64
+#define THREAD_ID_BITS		8
+#define THREAD_ID_MASK		((1 << THREAD_ID_BITS) - 1)
+static unsigned int rim_thread_ids[MAX_THREADS];
+static pthread_t rim_threads[MAX_THREADS];
+
+
+/*
+ * Each rim_thread works on an exclusive "chunk" of size
+ * RIM_CHUNK_SIZE.
+ *
+ * The ith rim_thread works on the ith chunk.
+ *
+ * The ith chunk begins at
+ * map1 + (i * RIM_CHUNK_SIZE)
+ */
+#define RIM_CHUNK_SIZE  	1024
+#define BITS_PER_BYTE 		8
+#define WORD_SIZE     		(sizeof(unsigned int))
+#define WORD_BITS		(WORD_SIZE * BITS_PER_BYTE)
+#define WORDS_PER_CHUNK		(RIM_CHUNK_SIZE/WORD_SIZE)
+
+static inline char *compute_chunk_start_addr(unsigned int thread_id)
+{
+	char *chunk_start;
+
+	chunk_start = (char *)((unsigned long)map1 +
+			       (thread_id * RIM_CHUNK_SIZE));
+
+	return chunk_start;
+}
+
+/*
+ * The "word-offset" of a word-aligned address inside a chunk, is
+ * defined to be the number of words that precede the address in that
+ * chunk.
+ *
+ * WORD_OFFSET_BITS denote the number of bits required to represent
+ * the word-offsets of all the word-aligned addresses of a chunk.
+ */
+#define WORD_OFFSET_BITS	(__builtin_ctz(WORDS_PER_CHUNK))
+#define WORD_OFFSET_MASK	((1 << WORD_OFFSET_BITS) - 1)
+
+static inline unsigned int compute_word_offset(char *start, unsigned int *addr)
+{
+	unsigned int delta_bytes, ret;
+	delta_bytes = (unsigned long)addr - (unsigned long)start;
+
+	ret = delta_bytes/WORD_SIZE;
+
+	return ret;
+}
+
+/*
+ * A "sweep" is defined to be the sequential execution of the
+ * rim-sequence by a rim_thread on its chunk one word at a time,
+ * starting from the first word of its chunk and ending with the last
+ * word of its chunk.
+ *
+ * Each sweep of a rim_thread is uniquely identified by a sweep_id.
+ * SWEEP_ID_BITS denote the number of bits required to represent
+ * the sweep_ids of rim_threads.
+ *
+ * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS,
+ * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below.
+ */
+#define SWEEP_ID_BITS		(WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS))
+#define SWEEP_ID_MASK		((1 << SWEEP_ID_BITS) - 1)
+
+/*
+ * A "store-pattern" is the word-pattern that is stored into a word
+ * location in the 4)STORE step of the rim-sequence.
+ *
+ * In the store-pattern, we shall encode:
+ *
+ *      - The thread-id of the rim_thread performing the store
+ *        (The most significant THREAD_ID_BITS)
+ *
+ *      - The word-offset of the address into which the store is being
+ *        performed (The next WORD_OFFSET_BITS)
+ *
+ *      - The sweep_id of the current sweep in which the store is
+ *        being performed. (The lower SWEEP_ID_BITS)
+ *
+ * Store Pattern: 32 bits
+ * |------------------|--------------------|---------------------------------|
+ * |    Thread id     |  Word offset       |         sweep_id                |
+ * |------------------|--------------------|---------------------------------|
+ *    THREAD_ID_BITS     WORD_OFFSET_BITS          SWEEP_ID_BITS
+ *
+ * In the store pattern, the (Thread-id + Word-offset) uniquely identify the
+ * address to which the store is being performed i.e,
+ *    address == map1 +
+ *              (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE)
+ *
+ * And the sweep_id in the store pattern identifies the time when the
+ * store was performed by the rim_thread.
+ *
+ * We shall use this property in the 3)COMPARE step of the
+ * rim-sequence.
+ */
+#define SWEEP_ID_SHIFT	0
+#define WORD_OFFSET_SHIFT	(SWEEP_ID_BITS)
+#define THREAD_ID_SHIFT		(WORD_OFFSET_BITS + SWEEP_ID_BITS)
+
+/*
+ * Compute the store pattern for a given thread with id @tid, at
+ * location @addr in the sweep identified by @sweep_id
+ */
+static inline unsigned int compute_store_pattern(unsigned int tid,
+						 unsigned int *addr,
+						 unsigned int sweep_id)
+{
+	unsigned int ret = 0;
+	char *start = compute_chunk_start_addr(tid);
+	unsigned int word_offset = compute_word_offset(start, addr);
+
+	ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT;
+	ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT;
+	ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT;
+	return ret;
+}
+
+/* Extract the thread-id from the given store-pattern */
+static inline unsigned int extract_tid(unsigned int pattern)
+{
+	unsigned int ret;
+
+	ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK;
+	return ret;
+}
+
+/* Extract the word-offset from the given store-pattern */
+static inline unsigned int extract_word_offset(unsigned int pattern)
+{
+	unsigned int ret;
+
+	ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK;
+
+	return ret;
+}
+
+/* Extract the sweep-id from the given store-pattern */
+static inline unsigned int extract_sweep_id(unsigned int pattern)
+
+{
+	unsigned int ret;
+
+	ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK;
+
+	return ret;
+}
+
+/************************************************************
+ *                                                          *
+ *          Logging the output of the verification          *
+ *                                                          *
+ ************************************************************/
+#define LOGDIR_NAME_SIZE 100
+static char logdir[LOGDIR_NAME_SIZE];
+
+static FILE *fp[MAX_THREADS];
+static const char logfilename[] ="Thread-%02d-Chunk";
+
+static inline void start_verification_log(unsigned int tid,
+					  unsigned int *addr,
+					  unsigned int cur_sweep_id,
+					  unsigned int prev_sweep_id)
+{
+	FILE *f;
+	char logfile[30];
+	char path[LOGDIR_NAME_SIZE + 30];
+	char separator[2] = "/";
+	char *chunk_start = compute_chunk_start_addr(tid);
+	unsigned int size = RIM_CHUNK_SIZE;
+
+	sprintf(logfile, logfilename, tid);
+	strcpy(path, logdir);
+	strcat(path, separator);
+	strcat(path, logfile);
+	f = fopen(path, "w");
+
+	if (!f) {
+		err_msg("Unable to create logfile\n");
+	}
+
+	fp[tid] = f;
+
+	fprintf(f, "----------------------------------------------------------\n");
+	fprintf(f, "PID                = %d\n", rim_process_pid);
+	fprintf(f, "Thread id          = %02d\n", tid);
+	fprintf(f, "Chunk Start Addr   = 0x%016lx\n", (unsigned long)chunk_start);
+	fprintf(f, "Chunk Size         = %d\n", size);
+	fprintf(f, "Next Store Addr    = 0x%016lx\n", (unsigned long)addr);
+	fprintf(f, "Current sweep-id   = 0x%08x\n", cur_sweep_id);
+	fprintf(f, "Previous sweep-id  = 0x%08x\n", prev_sweep_id);
+	fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void log_anamoly(unsigned int tid, unsigned int *addr,
+			       unsigned int expected, unsigned int observed)
+{
+	FILE *f = fp[tid];
+
+	fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n",
+	        tid, (unsigned long)addr, expected, observed);
+	fprintf(f, "Thread %02d: Expected Thread id   = %02d\n", tid, extract_tid(expected));
+	fprintf(f, "Thread %02d: Observed Thread id   = %02d\n", tid, extract_tid(observed));
+	fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected));
+	fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed));
+	fprintf(f, "Thread %02d: Expected sweep-id    = 0x%x\n", tid, extract_sweep_id(expected));
+	fprintf(f, "Thread %02d: Observed sweep-id    = 0x%x\n", tid, extract_sweep_id(observed));
+	fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies)
+{
+	FILE *f = fp[tid];
+	char logfile[30];
+	char path[LOGDIR_NAME_SIZE + 30];
+	char separator[] = "/";
+
+	fclose(f);
+
+	if (nr_anamolies == 0) {
+		remove(path);
+		return;
+	}
+
+	sprintf(logfile, logfilename, tid);
+	strcpy(path, logdir);
+	strcat(path, separator);
+	strcat(path, logfile);
+
+	printf("Thread %02d chunk has %d corrupted words. For details check %s\n",
+		tid, nr_anamolies, path);
+}
+
+/*
+ * When a COMPARE step of a rim-sequence fails, the rim_thread informs
+ * everyone else via the shared_memory pointed to by
+ * corruption_found variable. On seeing this, every thread verifies the
+ * content of its chunk as follows.
+ *
+ * Suppose a thread identified with @tid was about to store (but not
+ * yet stored) to @next_store_addr in its current sweep identified
+ * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id.
+ *
+ * This implies that for all the addresses @addr < @next_store_addr,
+ * Thread @tid has already performed a store as part of its current
+ * sweep. Hence we expect the content of such @addr to be:
+ *    |-------------------------------------------------|
+ *    | tid   | word_offset(addr) |    cur_sweep_id     |
+ *    |-------------------------------------------------|
+ *
+ * Since Thread @tid is yet to perform stores on address
+ * @next_store_addr and above, we expect the content of such an
+ * address @addr to be:
+ *    |-------------------------------------------------|
+ *    | tid   | word_offset(addr) |    prev_sweep_id    |
+ *    |-------------------------------------------------|
+ *
+ * The verifier function @verify_chunk does this verification and logs
+ * any anamolies that it finds.
+ */
+static void verify_chunk(unsigned int tid, unsigned int *next_store_addr,
+		  unsigned int cur_sweep_id,
+		  unsigned int prev_sweep_id)
+{
+	unsigned int *iter_ptr;
+	unsigned int size = RIM_CHUNK_SIZE;
+	unsigned int expected;
+	unsigned int observed;
+	char *chunk_start = compute_chunk_start_addr(tid);
+
+	int nr_anamolies = 0;
+
+	start_verification_log(tid, next_store_addr,
+			       cur_sweep_id, prev_sweep_id);
+
+	for (iter_ptr = (unsigned int *)chunk_start;
+	     (unsigned long)iter_ptr < (unsigned long)chunk_start + size;
+	     iter_ptr++) {
+		unsigned int expected_sweep_id;
+
+		if (iter_ptr < next_store_addr) {
+			expected_sweep_id = cur_sweep_id;
+		} else {
+			expected_sweep_id = prev_sweep_id;
+		}
+
+		expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id);
+
+		dcbf((volatile unsigned int*)iter_ptr); //Flush before reading
+		observed = *iter_ptr;
+
+	        if (observed != expected) {
+			nr_anamolies++;
+			log_anamoly(tid, iter_ptr, expected, observed);
+		}
+	}
+
+	end_verification_log(tid, nr_anamolies);
+}
+
+static void set_pthread_cpu(pthread_t th, int cpu)
+{
+	cpu_set_t run_cpu_mask;
+	struct sched_param param;
+
+	CPU_ZERO(&run_cpu_mask);
+	CPU_SET(cpu, &run_cpu_mask);
+	pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask);
+
+	param.sched_priority = 1;
+	if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+		/* haven't reproduced with this setting, it kills random preemption which may be a factor */
+		fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+	}
+}
+
+static void set_mycpu(int cpu)
+{
+	cpu_set_t run_cpu_mask;
+	struct sched_param param;
+
+	CPU_ZERO(&run_cpu_mask);
+	CPU_SET(cpu, &run_cpu_mask);
+	sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask);
+
+	param.sched_priority = 1;
+	if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+		fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+	}
+}
+
+static volatile int segv_wait;
+
+static void segv_handler(int signo, siginfo_t *info, void *extra)
+{
+	while (segv_wait) {
+		sched_yield();
+	}
+
+}
+
+static void set_segv_handler(void)
+{
+	struct sigaction sa;
+
+	sa.sa_flags = SA_SIGINFO;
+	sa.sa_sigaction = segv_handler;
+
+	if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+		perror("sigaction");
+		exit(EXIT_FAILURE);
+	}
+}
+
+int timeout = 0;
+/*
+ * This function is executed by every rim_thread.
+ *
+ * This function performs sweeps over the exclusive chunks of the
+ * rim_threads executing the rim-sequence one word at a time.
+ */
+static void *rim_fn(void *arg)
+{
+	unsigned int tid = *((unsigned int *)arg);
+
+	int size = RIM_CHUNK_SIZE;
+	char *chunk_start = compute_chunk_start_addr(tid);
+
+	unsigned int prev_sweep_id;
+	unsigned int cur_sweep_id = 0;
+
+	/* word access */
+	unsigned int pattern = cur_sweep_id;
+	unsigned int *pattern_ptr = &pattern;
+	unsigned int *w_ptr, read_data;
+
+	set_segv_handler();
+
+	/*
+	 * Let us initialize the chunk:
+	 *
+	 * Each word-aligned address addr in the chunk,
+	 * is initialized to :
+	 *    |-------------------------------------------------|
+	 *    | tid   | word_offset(addr) |         0           |
+	 *    |-------------------------------------------------|
+	 */
+	for (w_ptr = (unsigned int *)chunk_start;
+	     (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+	     w_ptr++) {
+
+		*pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+		*w_ptr = *pattern_ptr;
+	}
+
+	while (!corruption_found && !timeout) {
+		prev_sweep_id = cur_sweep_id;
+		cur_sweep_id = cur_sweep_id + 1;
+
+		for (w_ptr = (unsigned int *)chunk_start;
+		     (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+		     w_ptr++)  {
+			unsigned int old_pattern;
+
+			/*
+			 * Compute the pattern that we would have
+			 * stored at this location in the previous
+			 * sweep.
+			 */
+			old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id);
+
+			/*
+			 * FLUSH:Ensure that we flush the contents of
+			 *       the cache before loading
+			 */
+			dcbf((volatile unsigned int*)w_ptr); //Flush
+
+			/* LOAD: Read the value */
+			read_data = *w_ptr; //Load
+
+			/*
+			 * COMPARE: Is it the same as what we had stored
+			 *          in the previous sweep ? It better be!
+			 */
+			if (read_data != old_pattern) {
+				/* No it isn't! Tell everyone */
+				corruption_found = 1;
+			}
+
+			/*
+			 * Before performing a store, let us check if
+			 * any rim_thread has found a corruption.
+			 */
+			if (corruption_found || timeout) {
+				/*
+				 * Yes. Someone (including us!) has found
+				 * a corruption :(
+				 *
+				 * Let us verify that our chunk is
+				 * correct.
+				 */
+				/* But first, let us allow the dust to settle down! */
+				verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id);
+
+				return 0;
+			}
+
+			/*
+			 * Compute the new pattern that we are going
+			 * to write to this location
+			 */
+			*pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+
+			/*
+			 * STORE: Now let us write this pattern into
+			 *        the location
+			 */
+			*w_ptr = *pattern_ptr;
+		}
+	}
+
+	return NULL;
+}
+
+
+static unsigned long start_cpu = 0;
+static unsigned long nrthreads = 4;
+
+static pthread_t mem_snapshot_thread;
+
+static void *mem_snapshot_fn(void *arg)
+{
+	int page_size = getpagesize();
+	size_t size = page_size;
+	void *tmp = malloc(size);
+
+	while (!corruption_found && !timeout) {
+		/* Stop memory migration once corruption is found */
+		segv_wait = 1;
+
+		mprotect(map1, size, PROT_READ);
+
+		/*
+		 * Load from the working alias (map1). Loading from map2
+		 * also fails.
+		 */
+		memcpy(tmp, map1, size);
+
+		/*
+		 * Stores must go via map2 which has write permissions, but
+		 * the corrupted data tends to be seen in the snapshot buffer,
+		 * so corruption does not appear to be introduced at the
+		 * copy-back via map2 alias here.
+		 */
+		memcpy(map2, tmp, size);
+		/*
+		 * Before releasing other threads, must ensure the copy
+		 * back to
+		 */
+		asm volatile("sync" ::: "memory");
+		mprotect(map1, size, PROT_READ|PROT_WRITE);
+		asm volatile("sync" ::: "memory");
+		segv_wait = 0;
+
+		usleep(1); /* This value makes a big difference */
+	}
+
+	return 0;
+}
+
+void alrm_sighandler(int sig)
+{
+	timeout = 1;
+}
+
+int main(int argc, char *argv[])
+{
+	int c;
+	int page_size = getpagesize();
+	time_t now;
+	int i, dir_error;
+	pthread_attr_t attr;
+	key_t shm_key = (key_t) getpid();
+	int shmid, run_time = 20 * 60;
+	struct sigaction sa_alrm;
+
+	snprintf(logdir, LOGDIR_NAME_SIZE,
+		 "/tmp/logdir-%u", (unsigned int)getpid());
+	while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) {
+		switch(c) {
+		case 'r':
+			start_cpu = strtoul(optarg, NULL, 10);
+			break;
+		case 'h':
+			printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]);
+			exit(0);
+			break;
+		case 'n':
+			nrthreads = strtoul(optarg, NULL, 10);
+			break;
+		case 'l':
+			strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1);
+			break;
+		case 't':
+			run_time = strtoul(optarg, NULL, 10);
+			break;
+		default:
+			printf("invalid option\n");
+			exit(0);
+			break;
+		}
+	}
+
+	if (nrthreads > MAX_THREADS)
+		nrthreads = MAX_THREADS;
+
+	shmid = shmget(shm_key, page_size, IPC_CREAT|0666);
+	if (shmid < 0) {
+		err_msg("Failed shmget\n");
+	}
+
+	map1 = shmat(shmid, NULL, 0);
+	if (map1 == (void *) -1) {
+		err_msg("Failed shmat");
+	}
+
+	map2 = shmat(shmid, NULL, 0);
+	if (map2 == (void *) -1) {
+		err_msg("Failed shmat");
+	}
+
+	dir_error = mkdir(logdir, 0755);
+
+	if (dir_error) {
+		err_msg("Failed mkdir");
+	}
+
+	printf("start_cpu list:%lu\n", start_cpu);
+	printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads);
+	printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2);
+	printf("logdir at : %s\n", logdir);
+	printf("Timeout: %d seconds\n", run_time);
+
+	time(&now);
+	printf("=================================\n");
+	printf("     Starting Test\n");
+	printf("     %s", ctime(&now));
+	printf("=================================\n");
+
+	for (i = 0; i < nrthreads; i++) {
+		if (1 && !fork()) {
+			prctl(PR_SET_PDEATHSIG, SIGKILL);
+			set_mycpu(start_cpu + i);
+			for (;;)
+				sched_yield();
+			exit(0);
+		}
+	}
+
+
+	sa_alrm.sa_handler = &alrm_sighandler;
+	sigemptyset(&sa_alrm.sa_mask);
+	sa_alrm.sa_flags = 0;
+
+	if (sigaction(SIGALRM, &sa_alrm, 0) == -1) {
+		err_msg("Failed signal handler registration\n");
+	}
+
+	alarm(run_time);
+
+	pthread_attr_init(&attr);
+	for (i = 0; i < nrthreads; i++) {
+		rim_thread_ids[i] = i;
+		pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]);
+		set_pthread_cpu(rim_threads[i], start_cpu + i);
+	}
+
+	pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1);
+	set_pthread_cpu(mem_snapshot_thread, start_cpu + i);
+
+
+	pthread_join(mem_snapshot_thread, NULL);
+	for (i = 0; i < nrthreads; i++) {
+		pthread_join(rim_threads[i], NULL);
+	}
+
+	if (!timeout) {
+		time(&now);
+		printf("=================================\n");
+		printf("      Data Corruption Detected\n");
+		printf("      %s", ctime(&now));
+		printf("      See logfiles in %s\n", logdir);
+		printf("=================================\n");
+		return 1;
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore
new file mode 100644
index 000000000..e748f336e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/.gitignore
@@ -0,0 +1,3 @@
+count_instructions
+l3_bank_test
+per_event_excludes
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
new file mode 100644
index 000000000..19046db99
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+	$(MAKE) -C ../
+
+TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
+EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+all: $(TEST_GEN_PROGS) ebb
+
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
+
+# loop.S can only be built 64-bit
+$(OUTPUT)/count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
+	$(CC) $(CFLAGS) -m64 -o $@ $^
+
+$(OUTPUT)/per_event_excludes: ../utils.c
+
+DEFAULT_RUN_TESTS := $(RUN_TESTS)
+override define RUN_TESTS
+	$(DEFAULT_RUN_TESTS)
+	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
+endef
+
+DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
+override define EMIT_TESTS
+	$(DEFAULT_EMIT_TESTS)
+	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
+endef
+
+DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
+override define INSTALL_RULE
+	$(DEFAULT_INSTALL_RULE)
+	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
+endef
+
+clean:
+	$(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o
+	TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
+
+ebb:
+	TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
+
+.PHONY: all run_tests clean ebb
diff --git a/tools/testing/selftests/powerpc/pmu/count_instructions.c b/tools/testing/selftests/powerpc/pmu/count_instructions.c
new file mode 100644
index 000000000..4622117b2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/count_instructions.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "utils.h"
+#include "lib.h"
+
+extern void thirty_two_instruction_loop(u64 loops);
+
+static void setup_event(struct event *e, u64 config, char *name)
+{
+	event_init_opts(e, config, PERF_TYPE_HARDWARE, name);
+
+	e->attr.disabled = 1;
+	e->attr.exclude_kernel = 1;
+	e->attr.exclude_hv = 1;
+	e->attr.exclude_idle = 1;
+}
+
+static int do_count_loop(struct event *events, u64 instructions,
+			 u64 overhead, bool report)
+{
+	s64 difference, expected;
+	double percentage;
+
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+	/* Run for 1M instructions */
+	thirty_two_instruction_loop(instructions >> 5);
+
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+	event_read(&events[0]);
+	event_read(&events[1]);
+
+	expected = instructions + overhead;
+	difference = events[0].result.value - expected;
+	percentage = (double)difference / events[0].result.value * 100;
+
+	if (report) {
+		event_report(&events[0]);
+		event_report(&events[1]);
+
+		printf("Looped for %llu instructions, overhead %llu\n", instructions, overhead);
+		printf("Expected %llu\n", expected);
+		printf("Actual   %llu\n", events[0].result.value);
+		printf("Delta    %lld, %f%%\n", difference, percentage);
+	}
+
+	event_reset(&events[0]);
+	event_reset(&events[1]);
+
+	if (difference < 0)
+		difference = -difference;
+
+	/* Tolerate a difference below 0.0001 % */
+	difference *= 10000 * 100;
+	if (difference / events[0].result.value)
+		return -1;
+
+	return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static u64 determine_overhead(struct event *events)
+{
+	u64 current, overhead;
+	int i;
+
+	do_count_loop(events, 0, 0, false);
+	overhead = events[0].result.value;
+
+	for (i = 0; i < 100; i++) {
+		do_count_loop(events, 0, 0, false);
+		current = events[0].result.value;
+		if (current < overhead) {
+			printf("Replacing overhead %llu with %llu\n", overhead, current);
+			overhead = current;
+		}
+	}
+
+	return overhead;
+}
+
+static int test_body(void)
+{
+	struct event events[2];
+	u64 overhead;
+
+	setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+	setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, "cycles");
+
+	if (event_open(&events[0])) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	if (event_open_with_group(&events[1], events[0].fd)) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	overhead = determine_overhead(events);
+	printf("Overhead of null loop: %llu instructions\n", overhead);
+
+	/* Run for 1Mi instructions */
+	FAIL_IF(do_count_loop(events, 1000000, overhead, true));
+
+	/* Run for 10Mi instructions */
+	FAIL_IF(do_count_loop(events, 10000000, overhead, true));
+
+	/* Run for 100Mi instructions */
+	FAIL_IF(do_count_loop(events, 100000000, overhead, true));
+
+	/* Run for 1Bi instructions */
+	FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
+
+	/* Run for 16Bi instructions */
+	FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
+
+	/* Run for 64Bi instructions */
+	FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
+
+	event_close(&events[0]);
+	event_close(&events[1]);
+
+	return 0;
+}
+
+static int count_instructions(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(void)
+{
+	return test_harness(count_instructions, "count_instructions");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
new file mode 100644
index 000000000..42bddbed8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
@@ -0,0 +1,22 @@
+reg_access_test
+event_attributes_test
+cycles_test
+cycles_with_freeze_test
+pmc56_overflow_test
+ebb_vs_cpu_event_test
+cpu_event_vs_ebb_test
+cpu_event_pinned_vs_ebb_test
+task_event_vs_ebb_test
+task_event_pinned_vs_ebb_test
+multi_ebb_procs_test
+multi_counter_test
+pmae_handling_test
+close_clears_pmcc_test
+instruction_count_test
+fork_cleanup_test
+ebb_on_child_test
+ebb_on_willing_child_test
+back_to_back_ebbs_test
+lost_exception_test
+no_handler_test
+cycles_with_mmcr2_test
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
new file mode 100644
index 000000000..bd5dfa509
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+	$(MAKE) -C ../../
+
+# The EBB handler is 64-bit code and everything links against it
+CFLAGS += -m64
+
+TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test	\
+	 cycles_with_freeze_test pmc56_overflow_test		\
+	 ebb_vs_cpu_event_test cpu_event_vs_ebb_test		\
+	 cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test	\
+	 task_event_pinned_vs_ebb_test multi_ebb_procs_test	\
+	 multi_counter_test pmae_handling_test			\
+	 close_clears_pmcc_test instruction_count_test		\
+	 fork_cleanup_test ebb_on_child_test			\
+	 ebb_on_willing_child_test back_to_back_ebbs_test	\
+	 lost_exception_test no_handler_test			\
+	 cycles_with_mmcr2_test
+
+top_srcdir = ../../../../../..
+include ../../../lib.mk
+
+$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
+	       ebb.c ebb_handler.S trace.c busy_loop.S
+
+$(OUTPUT)/instruction_count_test: ../loop.S
+
+$(OUTPUT)/lost_exception_test: ../lib.c
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
new file mode 100644
index 000000000..031baa436
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+#define NUMBER_OF_EBBS	50
+
+/*
+ * Test that if we overflow the counter while in the EBB handler, we take
+ * another EBB on exiting from the handler.
+ *
+ * We do this by counting with a stupidly low sample period, causing us to
+ * overflow the PMU while we're still in the EBB handler, leading to another
+ * EBB.
+ *
+ * We get out of what would otherwise be an infinite loop by leaving the
+ * counter frozen once we've taken enough EBBs.
+ */
+
+static void ebb_callee(void)
+{
+	uint64_t siar, val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	/* Resets the PMC */
+	count_pmc(1, sample_period);
+
+out:
+	if (ebb_state.stats.ebb_count == NUMBER_OF_EBBS)
+		/* Reset but leave counters frozen */
+		reset_ebb_with_clear_mask(MMCR0_PMAO);
+	else
+		/* Unfreezes */
+		reset_ebb();
+
+	/* Do some stuff to chew some cycles and pop the counter */
+	siar = mfspr(SPRN_SIAR);
+	trace_log_reg(ebb_state.trace, SPRN_SIAR, siar);
+
+	val = mfspr(SPRN_PMC1);
+	trace_log_reg(ebb_state.trace, SPRN_PMC1, val);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+}
+
+int back_to_back_ebbs(void)
+{
+	struct event event;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	sample_period = 5;
+
+	ebb_freeze_pmcs();
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+	ebb_global_enable();
+	ebb_unfreeze_pmcs();
+
+	while (ebb_state.stats.ebb_count < NUMBER_OF_EBBS)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count != NUMBER_OF_EBBS);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(back_to_back_ebbs, "back_to_back_ebbs");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
new file mode 100644
index 000000000..c7e4093f1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+	.text
+
+FUNC_START(core_busy_loop)
+	stdu	%r1, -168(%r1)
+	std	r14, 160(%r1)
+	std	r15, 152(%r1)
+	std	r16, 144(%r1)
+	std	r17, 136(%r1)
+	std	r18, 128(%r1)
+	std	r19, 120(%r1)
+	std	r20, 112(%r1)
+	std	r21, 104(%r1)
+	std	r22, 96(%r1)
+	std	r23, 88(%r1)
+	std	r24, 80(%r1)
+	std	r25, 72(%r1)
+	std	r26, 64(%r1)
+	std	r27, 56(%r1)
+	std	r28, 48(%r1)
+	std	r29, 40(%r1)
+	std	r30, 32(%r1)
+	std	r31, 24(%r1)
+
+	li	 r3, 0x3030
+	std	 r3, -96(%r1)
+	li	 r4, 0x4040
+	std	 r4, -104(%r1)
+	li	 r5, 0x5050
+	std	 r5, -112(%r1)
+	li	 r6, 0x6060
+	std	 r6, -120(%r1)
+	li	 r7, 0x7070
+	std	 r7, -128(%r1)
+	li	 r8, 0x0808
+	std	 r8, -136(%r1)
+	li	 r9, 0x0909
+	std	 r9, -144(%r1)
+	li	r10, 0x1010
+	std	r10, -152(%r1)
+	li	r11, 0x1111
+	std	r11, -160(%r1)
+	li	r14, 0x1414
+	std	r14, -168(%r1)
+	li	r15, 0x1515
+	std	r15, -176(%r1)
+	li	r16, 0x1616
+	std	r16, -184(%r1)
+	li	r17, 0x1717
+	std	r17, -192(%r1)
+	li	r18, 0x1818
+	std	r18, -200(%r1)
+	li	r19, 0x1919
+	std	r19, -208(%r1)
+	li	r20, 0x2020
+	std	r20, -216(%r1)
+	li	r21, 0x2121
+	std	r21, -224(%r1)
+	li	r22, 0x2222
+	std	r22, -232(%r1)
+	li	r23, 0x2323
+	std	r23, -240(%r1)
+	li	r24, 0x2424
+	std	r24, -248(%r1)
+	li	r25, 0x2525
+	std	r25, -256(%r1)
+	li	r26, 0x2626
+	std	r26, -264(%r1)
+	li	r27, 0x2727
+	std	r27, -272(%r1)
+	li	r28, 0x2828
+	std	r28, -280(%r1)
+	li	r29, 0x2929
+	std	r29, -288(%r1)
+	li	r30, 0x3030
+	li	r31, 0x3131
+
+	li	r3, 0
+0:	addi	r3, r3, 1
+	cmpwi	r3, 100
+	blt	0b
+
+	/* Return 1 (fail) unless we get through all the checks */
+	li	r3, 1
+
+	/* Check none of our registers have been corrupted */
+	cmpwi	r4,  0x4040
+	bne	1f
+	cmpwi	r5,  0x5050
+	bne	1f
+	cmpwi	r6,  0x6060
+	bne	1f
+	cmpwi	r7,  0x7070
+	bne	1f
+	cmpwi	r8,  0x0808
+	bne	1f
+	cmpwi	r9,  0x0909
+	bne	1f
+	cmpwi	r10, 0x1010
+	bne	1f
+	cmpwi	r11, 0x1111
+	bne	1f
+	cmpwi	r14, 0x1414
+	bne	1f
+	cmpwi	r15, 0x1515
+	bne	1f
+	cmpwi	r16, 0x1616
+	bne	1f
+	cmpwi	r17, 0x1717
+	bne	1f
+	cmpwi	r18, 0x1818
+	bne	1f
+	cmpwi	r19, 0x1919
+	bne	1f
+	cmpwi	r20, 0x2020
+	bne	1f
+	cmpwi	r21, 0x2121
+	bne	1f
+	cmpwi	r22, 0x2222
+	bne	1f
+	cmpwi	r23, 0x2323
+	bne	1f
+	cmpwi	r24, 0x2424
+	bne	1f
+	cmpwi	r25, 0x2525
+	bne	1f
+	cmpwi	r26, 0x2626
+	bne	1f
+	cmpwi	r27, 0x2727
+	bne	1f
+	cmpwi	r28, 0x2828
+	bne	1f
+	cmpwi	r29, 0x2929
+	bne	1f
+	cmpwi	r30, 0x3030
+	bne	1f
+	cmpwi	r31, 0x3131
+	bne	1f
+
+	/* Load junk into all our registers before we reload them from the stack. */
+	li	r3,  0xde
+	li	r4,  0xad
+	li	r5,  0xbe
+	li	r6,  0xef
+	li	r7,  0xde
+	li	r8,  0xad
+	li	r9,  0xbe
+	li	r10, 0xef
+	li	r11, 0xde
+	li	r14, 0xad
+	li	r15, 0xbe
+	li	r16, 0xef
+	li	r17, 0xde
+	li	r18, 0xad
+	li	r19, 0xbe
+	li	r20, 0xef
+	li	r21, 0xde
+	li	r22, 0xad
+	li	r23, 0xbe
+	li	r24, 0xef
+	li	r25, 0xde
+	li	r26, 0xad
+	li	r27, 0xbe
+	li	r28, 0xef
+	li	r29, 0xdd
+
+	ld	r3,	-96(%r1)
+	cmpwi	r3,  0x3030
+	bne	1f
+	ld	r4,	-104(%r1)
+	cmpwi	r4,  0x4040
+	bne	1f
+	ld	r5,	-112(%r1)
+	cmpwi	r5,  0x5050
+	bne	1f
+	ld	r6,	-120(%r1)
+	cmpwi	r6,  0x6060
+	bne	1f
+	ld	r7,	-128(%r1)
+	cmpwi	r7,  0x7070
+	bne	1f
+	ld	r8,	-136(%r1)
+	cmpwi	r8,  0x0808
+	bne	1f
+	ld	r9,	-144(%r1)
+	cmpwi	r9,  0x0909
+	bne	1f
+	ld	r10, -152(%r1)
+	cmpwi	r10, 0x1010
+	bne	1f
+	ld	r11, -160(%r1)
+	cmpwi	r11, 0x1111
+	bne	1f
+	ld	r14, -168(%r1)
+	cmpwi	r14, 0x1414
+	bne	1f
+	ld	r15, -176(%r1)
+	cmpwi	r15, 0x1515
+	bne	1f
+	ld	r16, -184(%r1)
+	cmpwi	r16, 0x1616
+	bne	1f
+	ld	r17, -192(%r1)
+	cmpwi	r17, 0x1717
+	bne	1f
+	ld	r18, -200(%r1)
+	cmpwi	r18, 0x1818
+	bne	1f
+	ld	r19, -208(%r1)
+	cmpwi	r19, 0x1919
+	bne	1f
+	ld	r20, -216(%r1)
+	cmpwi	r20, 0x2020
+	bne	1f
+	ld	r21, -224(%r1)
+	cmpwi	r21, 0x2121
+	bne	1f
+	ld	r22, -232(%r1)
+	cmpwi	r22, 0x2222
+	bne	1f
+	ld	r23, -240(%r1)
+	cmpwi	r23, 0x2323
+	bne	1f
+	ld	r24, -248(%r1)
+	cmpwi	r24, 0x2424
+	bne	1f
+	ld	r25, -256(%r1)
+	cmpwi	r25, 0x2525
+	bne	1f
+	ld	r26, -264(%r1)
+	cmpwi	r26, 0x2626
+	bne	1f
+	ld	r27, -272(%r1)
+	cmpwi	r27, 0x2727
+	bne	1f
+	ld	r28, -280(%r1)
+	cmpwi	r28, 0x2828
+	bne	1f
+	ld	r29, -288(%r1)
+	cmpwi	r29, 0x2929
+	bne	1f
+
+	/* Load 0 (success) to return */
+	li	r3, 0
+
+1:	ld	r14, 160(%r1)
+	ld	r15, 152(%r1)
+	ld	r16, 144(%r1)
+	ld	r17, 136(%r1)
+	ld	r18, 128(%r1)
+	ld	r19, 120(%r1)
+	ld	r20, 112(%r1)
+	ld	r21, 104(%r1)
+	ld	r22, 96(%r1)
+	ld	r23, 88(%r1)
+	ld	r24, 80(%r1)
+	ld	r25, 72(%r1)
+	ld	r26, 64(%r1)
+	ld	r27, 56(%r1)
+	ld	r28, 48(%r1)
+	ld	r29, 40(%r1)
+	ld	r30, 32(%r1)
+	ld	r31, 24(%r1)
+	addi	%r1, %r1, 168
+	blr
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c
new file mode 100644
index 000000000..ac18cf617
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC, preventing further access
+ * by userspace to the PMU hardware.
+ */
+
+int close_clears_pmcc(void)
+{
+	struct event event;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 1)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	/* The real test is here, do we take a SIGILL when writing PMU regs now
+	 * that we have closed the event. We expect that we will. */
+
+	FAIL_IF(catch_sigill(write_pmc1));
+
+	/* We should still be able to read EBB regs though */
+	mfspr(SPRN_EBBHR);
+	mfspr(SPRN_EBBRR);
+	mfspr(SPRN_BESCR);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(close_clears_pmcc, "close_clears_pmcc");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
new file mode 100644
index 000000000..f0632e7fd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned cpu event vs an EBB - in that order. The pinned cpu event
+ * should remain and the EBB event should fail to enable.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.pinned = 1;
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int cpu_event_pinned_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	SKIP_IF(!ebb_is_supported());
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the cpu event first */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect it to fail to read the event */
+	FAIL_IF(wait_for_child(pid) != 2);
+
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event should have run */
+	FAIL_IF(event.result.value == 0);
+	FAIL_IF(event.result.enabled != event.result.running);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cpu_event_pinned_vs_ebb, "cpu_event_pinned_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
new file mode 100644
index 000000000..33e56a234
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a cpu event vs an EBB - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int cpu_event_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	SKIP_IF(!ebb_is_supported());
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the cpu event first */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect the child to succeed */
+	FAIL_IF(wait_for_child(pid));
+
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event may have run */
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cpu_event_vs_ebb, "cpu_event_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
new file mode 100644
index 000000000..361e0be9d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Basic test that counts user cycles and takes EBBs.
+ */
+int cycles(void)
+{
+	struct event event;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 10) {
+		FAIL_IF(core_busy_loop());
+		FAIL_IF(ebb_check_mmcr0());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(!ebb_check_count(1, sample_period, 100));
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles, "cycles");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
new file mode 100644
index 000000000..fe7d0dc2a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while using MMCR0_FC (freeze counters) to only count
+ * parts of the code. This is complicated by the fact that FC is set by the
+ * hardware when the event overflows. We may take the EBB after we have set FC,
+ * so we have to be careful about whether we clear FC at the end of the EBB
+ * handler or not.
+ */
+
+static bool counters_frozen = false;
+static int ebbs_while_frozen = 0;
+
+static void ebb_callee(void)
+{
+	uint64_t mask, val;
+
+	mask = MMCR0_PMAO | MMCR0_FC;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+	if (counters_frozen) {
+		trace_log_string(ebb_state.trace, "frozen");
+		ebbs_while_frozen++;
+		mask &= ~MMCR0_FC;
+	}
+
+	count_pmc(1, sample_period);
+out:
+	reset_ebb_with_clear_mask(mask);
+}
+
+int cycles_with_freeze(void)
+{
+	struct event event;
+	uint64_t val;
+	bool fc_cleared;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	fc_cleared = false;
+
+	/* Make sure we loop until we take at least one EBB */
+	while ((ebb_state.stats.ebb_count < 20 && !fc_cleared) ||
+		ebb_state.stats.ebb_count < 1)
+	{
+		counters_frozen = false;
+		mb();
+		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+		FAIL_IF(core_busy_loop());
+
+		counters_frozen = true;
+		mb();
+		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) |  MMCR0_FC);
+
+		val = mfspr(SPRN_MMCR0);
+		if (! (val & MMCR0_FC)) {
+			printf("Outside of loop, FC NOT set MMCR0 0x%lx\n", val);
+			fc_cleared = true;
+		}
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	dump_ebb_state();
+
+	printf("EBBs while frozen %d\n", ebbs_while_frozen);
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(fc_cleared);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles_with_freeze, "cycles_with_freeze");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
new file mode 100644
index 000000000..b9b30f974
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while manipulating the user accessible bits in MMCR2.
+ */
+
+/* We use two values because the first freezes PMC1 and so we would get no EBBs */
+#define MMCR2_EXPECTED_1 0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */
+#define MMCR2_EXPECTED_2 0x0020100804020000UL /* (     FC2P|FC3P|FC4P|FC5P|FC6P) */
+
+
+int cycles_with_mmcr2(void)
+{
+	struct event event;
+	uint64_t val, expected[2], actual;
+	int i;
+	bool bad_mmcr2;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* XXX Set of MMCR2 must be after enable */
+	expected[0] = MMCR2_EXPECTED_1;
+	expected[1] = MMCR2_EXPECTED_2;
+	i = 0;
+	bad_mmcr2 = false;
+
+	/* Make sure we loop until we take at least one EBB */
+	while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) ||
+		ebb_state.stats.ebb_count < 1)
+	{
+		mtspr(SPRN_MMCR2, expected[i % 2]);
+
+		FAIL_IF(core_busy_loop());
+
+		val = mfspr(SPRN_MMCR2);
+		if (val != expected[i % 2]) {
+			bad_mmcr2 = true;
+			actual = val;
+		}
+
+		i++;
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	if (bad_mmcr2)
+		printf("Bad MMCR2 value seen is 0x%lx\n", actual);
+
+	FAIL_IF(bad_mmcr2);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(cycles_with_mmcr2, "cycles_with_mmcr2");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
new file mode 100644
index 000000000..2694ae161
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE	/* For CPU_ZERO etc. */
+
+#include <sched.h>
+#include <sys/wait.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "trace.h"
+#include "ebb.h"
+
+
+void (*ebb_user_func)(void);
+
+void ebb_hook(void)
+{
+	if (ebb_user_func)
+		ebb_user_func();
+}
+
+struct ebb_state ebb_state;
+
+u64 sample_period = 0x40000000ull;
+
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask)
+{
+	u64 val;
+
+	/* 2) clear MMCR0[PMAO] - docs say BESCR[PMEO] should do this */
+	/* 3) set MMCR0[PMAE]	- docs say BESCR[PME] should do this */
+	val = mfspr(SPRN_MMCR0);
+	mtspr(SPRN_MMCR0, (val & ~mmcr0_clear_mask) | MMCR0_PMAE);
+
+	/* 4) clear BESCR[PMEO] */
+	mtspr(SPRN_BESCRR, BESCR_PMEO);
+
+	/* 5) set BESCR[PME] */
+	mtspr(SPRN_BESCRS, BESCR_PME);
+
+	/* 6) rfebb 1 - done in our caller */
+}
+
+void reset_ebb(void)
+{
+	reset_ebb_with_clear_mask(MMCR0_PMAO | MMCR0_FC);
+}
+
+/* Called outside of the EBB handler to check MMCR0 is sane */
+int ebb_check_mmcr0(void)
+{
+	u64 val;
+
+	val = mfspr(SPRN_MMCR0);
+	if ((val & (MMCR0_FC | MMCR0_PMAO)) == MMCR0_FC) {
+		/* It's OK if we see FC & PMAO, but not FC by itself */
+		printf("Outside of loop, only FC set 0x%llx\n", val);
+		return 1;
+	}
+
+	return 0;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge)
+{
+	u64 count, upper, lower;
+
+	count = ebb_state.stats.pmc_count[PMC_INDEX(pmc)];
+
+	lower = ebb_state.stats.ebb_count * (sample_period - fudge);
+
+	if (count < lower) {
+		printf("PMC%d count (0x%llx) below lower limit 0x%llx (-0x%llx)\n",
+			pmc, count, lower, lower - count);
+		return false;
+	}
+
+	upper = ebb_state.stats.ebb_count * (sample_period + fudge);
+
+	if (count > upper) {
+		printf("PMC%d count (0x%llx) above upper limit 0x%llx (+0x%llx)\n",
+			pmc, count, upper, count - upper);
+		return false;
+	}
+
+	printf("PMC%d count (0x%llx) is between 0x%llx and 0x%llx delta +0x%llx/-0x%llx\n",
+		pmc, count, lower, upper, count - lower, upper - count);
+
+	return true;
+}
+
+void standard_ebb_callee(void)
+{
+	int found, i;
+	u64 val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+	val = mfspr(SPRN_MMCR0);
+	trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+	found = 0;
+	for (i = 1; i <= 6; i++) {
+		if (ebb_state.pmc_enable[PMC_INDEX(i)])
+			found += count_pmc(i, sample_period);
+	}
+
+	if (!found)
+		ebb_state.stats.no_overflow++;
+
+out:
+	reset_ebb();
+}
+
+extern void ebb_handler(void);
+
+void setup_ebb_handler(void (*callee)(void))
+{
+	u64 entry;
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+	entry = (u64)ebb_handler;
+#else
+	struct opd
+	{
+	    u64 entry;
+	    u64 toc;
+	} *opd;
+
+	opd = (struct opd *)ebb_handler;
+	entry = opd->entry;
+#endif
+	printf("EBB Handler is at %#llx\n", entry);
+
+	ebb_user_func = callee;
+
+	/* Ensure ebb_user_func is set before we set the handler */
+	mb();
+	mtspr(SPRN_EBBHR, entry);
+
+	/* Make sure the handler is set before we return */
+	mb();
+}
+
+void clear_ebb_stats(void)
+{
+	memset(&ebb_state.stats, 0, sizeof(ebb_state.stats));
+}
+
+void dump_summary_ebb_state(void)
+{
+	printf("ebb_state:\n"			\
+	       "  ebb_count    = %d\n"		\
+	       "  spurious     = %d\n"		\
+	       "  negative     = %d\n"		\
+	       "  no_overflow  = %d\n"		\
+	       "  pmc[1] count = 0x%llx\n"	\
+	       "  pmc[2] count = 0x%llx\n"	\
+	       "  pmc[3] count = 0x%llx\n"	\
+	       "  pmc[4] count = 0x%llx\n"	\
+	       "  pmc[5] count = 0x%llx\n"	\
+	       "  pmc[6] count = 0x%llx\n",
+		ebb_state.stats.ebb_count, ebb_state.stats.spurious,
+		ebb_state.stats.negative, ebb_state.stats.no_overflow,
+		ebb_state.stats.pmc_count[0], ebb_state.stats.pmc_count[1],
+		ebb_state.stats.pmc_count[2], ebb_state.stats.pmc_count[3],
+		ebb_state.stats.pmc_count[4], ebb_state.stats.pmc_count[5]);
+}
+
+static char *decode_mmcr0(u32 value)
+{
+	static char buf[16];
+
+	buf[0] = '\0';
+
+	if (value & (1 << 31))
+		strcat(buf, "FC ");
+	if (value & (1 << 26))
+		strcat(buf, "PMAE ");
+	if (value & (1 << 7))
+		strcat(buf, "PMAO ");
+
+	return buf;
+}
+
+static char *decode_bescr(u64 value)
+{
+	static char buf[16];
+
+	buf[0] = '\0';
+
+	if (value & (1ull << 63))
+		strcat(buf, "GE ");
+	if (value & (1ull << 32))
+		strcat(buf, "PMAE ");
+	if (value & 1)
+		strcat(buf, "PMAO ");
+
+	return buf;
+}
+
+void dump_ebb_hw_state(void)
+{
+	u64 bescr;
+	u32 mmcr0;
+
+	mmcr0 = mfspr(SPRN_MMCR0);
+	bescr = mfspr(SPRN_BESCR);
+
+	printf("HW state:\n"		\
+	       "MMCR0 0x%016x %s\n"	\
+	       "MMCR2 0x%016lx\n"	\
+	       "EBBHR 0x%016lx\n"	\
+	       "BESCR 0x%016llx %s\n"	\
+	       "PMC1  0x%016lx\n"	\
+	       "PMC2  0x%016lx\n"	\
+	       "PMC3  0x%016lx\n"	\
+	       "PMC4  0x%016lx\n"	\
+	       "PMC5  0x%016lx\n"	\
+	       "PMC6  0x%016lx\n"	\
+	       "SIAR  0x%016lx\n",
+	       mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_MMCR2),
+	       mfspr(SPRN_EBBHR), bescr, decode_bescr(bescr),
+	       mfspr(SPRN_PMC1), mfspr(SPRN_PMC2), mfspr(SPRN_PMC3),
+	       mfspr(SPRN_PMC4), mfspr(SPRN_PMC5), mfspr(SPRN_PMC6),
+	       mfspr(SPRN_SIAR));
+}
+
+void dump_ebb_state(void)
+{
+	dump_summary_ebb_state();
+
+	dump_ebb_hw_state();
+
+	trace_buffer_print(ebb_state.trace);
+}
+
+int count_pmc(int pmc, uint32_t sample_period)
+{
+	uint32_t start_value;
+	u64 val;
+
+	/* 0) Read PMC */
+	start_value = pmc_sample_period(sample_period);
+
+	val = read_pmc(pmc);
+	if (val < start_value)
+		ebb_state.stats.negative++;
+	else
+		ebb_state.stats.pmc_count[PMC_INDEX(pmc)] += val - start_value;
+
+	trace_log_reg(ebb_state.trace, SPRN_PMC1 + pmc - 1, val);
+
+	/* 1) Reset PMC */
+	write_pmc(pmc, start_value);
+
+	/* Report if we overflowed */
+	return val >= COUNTER_OVERFLOW;
+}
+
+int ebb_event_enable(struct event *e)
+{
+	int rc;
+
+	/* Ensure any SPR writes are ordered vs us */
+	mb();
+
+	rc = ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+	if (rc)
+		return rc;
+
+	rc = event_read(e);
+
+	/* Ditto */
+	mb();
+
+	return rc;
+}
+
+void ebb_freeze_pmcs(void)
+{
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+	mb();
+}
+
+void ebb_unfreeze_pmcs(void)
+{
+	/* Unfreeze counters */
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+	mb();
+}
+
+void ebb_global_enable(void)
+{
+	/* Enable EBBs globally and PMU EBBs */
+	mtspr(SPRN_BESCR, 0x8000000100000000ull);
+	mb();
+}
+
+void ebb_global_disable(void)
+{
+	/* Disable EBBs & freeze counters, events are still scheduled */
+	mtspr(SPRN_BESCRR, BESCR_PME);
+	mb();
+}
+
+bool ebb_is_supported(void)
+{
+#ifdef PPC_FEATURE2_EBB
+	/* EBB requires at least POWER8 */
+	return have_hwcap2(PPC_FEATURE2_EBB);
+#else
+	return false;
+#endif
+}
+
+void event_ebb_init(struct event *e)
+{
+	e->attr.config |= (1ull << 63);
+}
+
+void event_bhrb_init(struct event *e, unsigned ifm)
+{
+	e->attr.config |= (1ull << 62) | ((u64)ifm << 60);
+}
+
+void event_leader_ebb_init(struct event *e)
+{
+	event_ebb_init(e);
+
+	e->attr.exclusive = 1;
+	e->attr.pinned = 1;
+}
+
+int ebb_child(union pipe read_pipe, union pipe write_pipe)
+{
+	struct event event;
+	uint64_t val;
+
+	FAIL_IF(wait_for_parent(read_pipe));
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(event_enable(&event));
+
+	if (event_read(&event)) {
+		/*
+		 * Some tests expect to fail here, so don't report an error on
+		 * this line, and return a distinguisable error code. Tell the
+		 * parent an error happened.
+		 */
+		notify_parent_of_error(write_pipe);
+		return 2;
+	}
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	FAIL_IF(notify_parent(write_pipe));
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	while (ebb_state.stats.ebb_count < 20) {
+		FAIL_IF(core_busy_loop());
+
+		/* To try and hit SIGILL case */
+		val  = mfspr(SPRN_MMCRA);
+		val |= mfspr(SPRN_MMCR2);
+		val |= mfspr(SPRN_MMCR0);
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	dump_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+static jmp_buf setjmp_env;
+
+static void sigill_handler(int signal)
+{
+	printf("Took sigill\n");
+	longjmp(setjmp_env, 1);
+}
+
+static struct sigaction sigill_action = {
+	.sa_handler = sigill_handler,
+};
+
+int catch_sigill(void (*func)(void))
+{
+	if (sigaction(SIGILL, &sigill_action, NULL)) {
+		perror("sigaction");
+		return 1;
+	}
+
+	if (setjmp(setjmp_env) == 0) {
+		func();
+		return 1;
+	}
+
+	return 0;
+}
+
+void write_pmc1(void)
+{
+	mtspr(SPRN_PMC1, 0);
+}
+
+void write_pmc(int pmc, u64 value)
+{
+	switch (pmc) {
+		case 1: mtspr(SPRN_PMC1, value); break;
+		case 2: mtspr(SPRN_PMC2, value); break;
+		case 3: mtspr(SPRN_PMC3, value); break;
+		case 4: mtspr(SPRN_PMC4, value); break;
+		case 5: mtspr(SPRN_PMC5, value); break;
+		case 6: mtspr(SPRN_PMC6, value); break;
+	}
+}
+
+u64 read_pmc(int pmc)
+{
+	switch (pmc) {
+		case 1: return mfspr(SPRN_PMC1);
+		case 2: return mfspr(SPRN_PMC2);
+		case 3: return mfspr(SPRN_PMC3);
+		case 4: return mfspr(SPRN_PMC4);
+		case 5: return mfspr(SPRN_PMC5);
+		case 6: return mfspr(SPRN_PMC6);
+	}
+
+	return 0;
+}
+
+static void term_handler(int signal)
+{
+	dump_summary_ebb_state();
+	dump_ebb_hw_state();
+	abort();
+}
+
+struct sigaction term_action = {
+	.sa_handler = term_handler,
+};
+
+static void __attribute__((constructor)) ebb_init(void)
+{
+	clear_ebb_stats();
+
+	if (sigaction(SIGTERM, &term_action, NULL))
+		perror("sigaction");
+
+	ebb_state.trace = trace_buffer_allocate(1 * 1024 * 1024);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
new file mode 100644
index 000000000..f87e761f8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+#define _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+
+#include "../event.h"
+#include "../lib.h"
+#include "trace.h"
+#include "reg.h"
+
+#define PMC_INDEX(pmc)	((pmc)-1)
+
+#define NUM_PMC_VALUES	128
+
+struct ebb_state
+{
+	struct {
+		u64 pmc_count[6];
+		volatile int ebb_count;
+		int spurious;
+		int negative;
+		int no_overflow;
+	} stats;
+
+	bool pmc_enable[6];
+	struct trace_buffer *trace;
+};
+
+extern struct ebb_state ebb_state;
+
+#define COUNTER_OVERFLOW 0x80000000ull
+
+static inline uint32_t pmc_sample_period(uint32_t value)
+{
+	return COUNTER_OVERFLOW - value;
+}
+
+static inline void ebb_enable_pmc_counting(int pmc)
+{
+	ebb_state.pmc_enable[PMC_INDEX(pmc)] = true;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge);
+void event_leader_ebb_init(struct event *e);
+void event_ebb_init(struct event *e);
+void event_bhrb_init(struct event *e, unsigned ifm);
+void setup_ebb_handler(void (*callee)(void));
+void standard_ebb_callee(void);
+int ebb_event_enable(struct event *e);
+void ebb_global_enable(void);
+void ebb_global_disable(void);
+bool ebb_is_supported(void);
+void ebb_freeze_pmcs(void);
+void ebb_unfreeze_pmcs(void);
+void event_ebb_init(struct event *e);
+void event_leader_ebb_init(struct event *e);
+int count_pmc(int pmc, uint32_t sample_period);
+void dump_ebb_state(void);
+void dump_summary_ebb_state(void);
+void dump_ebb_hw_state(void);
+void clear_ebb_stats(void);
+void write_pmc(int pmc, u64 value);
+u64 read_pmc(int pmc);
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask);
+void reset_ebb(void);
+int ebb_check_mmcr0(void);
+
+extern u64 sample_period;
+
+int core_busy_loop(void);
+int ebb_child(union pipe read_pipe, union pipe write_pipe);
+int catch_sigill(void (*func)(void));
+void write_pmc1(void);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_EBB_H */
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
new file mode 100644
index 000000000..14274ea20
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
@@ -0,0 +1,365 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+#include "reg.h"
+
+
+/* ppc-asm.h defines most of the reg aliases, but not r1/r2. */
+#define r1 1
+#define r2 2
+
+#define RFEBB   .long 0x4c000924
+
+/* Stack layout:
+ *
+ *                   ^
+ *  User stack       |
+ *  Back chain ------+	<- r1		<-------+
+ *  ...						|
+ *  Red zone / ABI Gap				|
+ *  ...						|
+ *  vr63	<+				|
+ *  vr0		 |				|
+ *  VSCR	 |				|
+ *  FSCR	 |				|
+ *  r31		 | Save area			|
+ *  r0		 |				|
+ *  XER		 |				|
+ *  CTR		 |				|
+ *  LR		 |				|
+ *  CCR		<+				|
+ *  ...		<+				|
+ *  LR		 | Caller frame			|
+ *  CCR		 |				|
+ *  Back chain	<+	<- updated r1	--------+
+ *
+ */
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ABIGAP		512
+#else
+#define ABIGAP		288
+#endif
+
+#define NR_GPR		32
+#define NR_SPR		6
+#define NR_VSR		64
+
+#define SAVE_AREA	((NR_GPR + NR_SPR) * 8 + (NR_VSR * 16))
+#define CALLER_FRAME	112
+
+#define STACK_FRAME	(ABIGAP + SAVE_AREA + CALLER_FRAME)
+
+#define CCR_SAVE	(CALLER_FRAME)
+#define LR_SAVE		(CCR_SAVE + 8)
+#define CTR_SAVE	(LR_SAVE  + 8)
+#define XER_SAVE	(CTR_SAVE + 8)
+#define GPR_SAVE(n)	(XER_SAVE + 8 + (8 * n))
+#define FSCR_SAVE	(GPR_SAVE(31) + 8)
+#define VSCR_SAVE	(FSCR_SAVE + 8)
+#define VSR_SAVE(n)	(VSCR_SAVE + 8 + (16 * n))
+
+#define SAVE_GPR(n)	std n,GPR_SAVE(n)(r1)
+#define REST_GPR(n)	ld  n,GPR_SAVE(n)(r1)
+#define TRASH_GPR(n)	lis n,0xaaaa
+
+#define SAVE_VSR(n, b)	li b, VSR_SAVE(n); stxvd2x n,b,r1
+#define LOAD_VSR(n, b)	li b, VSR_SAVE(n); lxvd2x  n,b,r1
+
+#define LOAD_REG_IMMEDIATE(reg,expr)	\
+	lis     reg,(expr)@highest;	\
+	ori     reg,reg,(expr)@higher;	\
+	rldicr  reg,reg,32,31;		\
+	oris    reg,reg,(expr)@h;	\
+	ori     reg,reg,(expr)@l;
+
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ENTRY_POINT(name) \
+	.type FUNC_NAME(name),@function; \
+	.globl FUNC_NAME(name); \
+	FUNC_NAME(name):
+
+#define RESTORE_TOC(name)	\
+	/* Restore our TOC pointer using our entry point */	\
+	LOAD_REG_IMMEDIATE(r12, name)				\
+0:	addis	r2,r12,(.TOC.-0b)@ha;				\
+	addi	r2,r2,(.TOC.-0b)@l;
+
+#else
+#define ENTRY_POINT(name) FUNC_START(name)
+#define RESTORE_TOC(name)	\
+	/* Restore our TOC pointer via our opd entry */	\
+	LOAD_REG_IMMEDIATE(r2, name)			\
+	ld      r2,8(r2);
+#endif
+
+    .text
+
+ENTRY_POINT(ebb_handler)
+    stdu    r1,-STACK_FRAME(r1)
+    SAVE_GPR(0)
+    mflr    r0
+    std     r0,LR_SAVE(r1)
+    mfcr    r0
+    std     r0,CCR_SAVE(r1)
+    mfctr   r0
+    std     r0,CTR_SAVE(r1)
+    mfxer   r0
+    std     r0,XER_SAVE(r1)
+    SAVE_GPR(2)
+    SAVE_GPR(3)
+    SAVE_GPR(4)
+    SAVE_GPR(5)
+    SAVE_GPR(6)
+    SAVE_GPR(7)
+    SAVE_GPR(8)
+    SAVE_GPR(9)
+    SAVE_GPR(10)
+    SAVE_GPR(11)
+    SAVE_GPR(12)
+    SAVE_GPR(13)
+    SAVE_GPR(14)
+    SAVE_GPR(15)
+    SAVE_GPR(16)
+    SAVE_GPR(17)
+    SAVE_GPR(18)
+    SAVE_GPR(19)
+    SAVE_GPR(20)
+    SAVE_GPR(21)
+    SAVE_GPR(22)
+    SAVE_GPR(23)
+    SAVE_GPR(24)
+    SAVE_GPR(25)
+    SAVE_GPR(26)
+    SAVE_GPR(27)
+    SAVE_GPR(28)
+    SAVE_GPR(29)
+    SAVE_GPR(30)
+    SAVE_GPR(31)
+    SAVE_VSR(0, r3)
+    mffs     f0
+    stfd     f0, FSCR_SAVE(r1)
+    mfvscr   f0
+    stfd     f0, VSCR_SAVE(r1)
+    SAVE_VSR(1,  r3)
+    SAVE_VSR(2,  r3)
+    SAVE_VSR(3,  r3)
+    SAVE_VSR(4,  r3)
+    SAVE_VSR(5,  r3)
+    SAVE_VSR(6,  r3)
+    SAVE_VSR(7,  r3)
+    SAVE_VSR(8,  r3)
+    SAVE_VSR(9,  r3)
+    SAVE_VSR(10, r3)
+    SAVE_VSR(11, r3)
+    SAVE_VSR(12, r3)
+    SAVE_VSR(13, r3)
+    SAVE_VSR(14, r3)
+    SAVE_VSR(15, r3)
+    SAVE_VSR(16, r3)
+    SAVE_VSR(17, r3)
+    SAVE_VSR(18, r3)
+    SAVE_VSR(19, r3)
+    SAVE_VSR(20, r3)
+    SAVE_VSR(21, r3)
+    SAVE_VSR(22, r3)
+    SAVE_VSR(23, r3)
+    SAVE_VSR(24, r3)
+    SAVE_VSR(25, r3)
+    SAVE_VSR(26, r3)
+    SAVE_VSR(27, r3)
+    SAVE_VSR(28, r3)
+    SAVE_VSR(29, r3)
+    SAVE_VSR(30, r3)
+    SAVE_VSR(31, r3)
+    SAVE_VSR(32, r3)
+    SAVE_VSR(33, r3)
+    SAVE_VSR(34, r3)
+    SAVE_VSR(35, r3)
+    SAVE_VSR(36, r3)
+    SAVE_VSR(37, r3)
+    SAVE_VSR(38, r3)
+    SAVE_VSR(39, r3)
+    SAVE_VSR(40, r3)
+    SAVE_VSR(41, r3)
+    SAVE_VSR(42, r3)
+    SAVE_VSR(43, r3)
+    SAVE_VSR(44, r3)
+    SAVE_VSR(45, r3)
+    SAVE_VSR(46, r3)
+    SAVE_VSR(47, r3)
+    SAVE_VSR(48, r3)
+    SAVE_VSR(49, r3)
+    SAVE_VSR(50, r3)
+    SAVE_VSR(51, r3)
+    SAVE_VSR(52, r3)
+    SAVE_VSR(53, r3)
+    SAVE_VSR(54, r3)
+    SAVE_VSR(55, r3)
+    SAVE_VSR(56, r3)
+    SAVE_VSR(57, r3)
+    SAVE_VSR(58, r3)
+    SAVE_VSR(59, r3)
+    SAVE_VSR(60, r3)
+    SAVE_VSR(61, r3)
+    SAVE_VSR(62, r3)
+    SAVE_VSR(63, r3)
+
+    TRASH_GPR(2)
+    TRASH_GPR(3)
+    TRASH_GPR(4)
+    TRASH_GPR(5)
+    TRASH_GPR(6)
+    TRASH_GPR(7)
+    TRASH_GPR(8)
+    TRASH_GPR(9)
+    TRASH_GPR(10)
+    TRASH_GPR(11)
+    TRASH_GPR(12)
+    TRASH_GPR(14)
+    TRASH_GPR(15)
+    TRASH_GPR(16)
+    TRASH_GPR(17)
+    TRASH_GPR(18)
+    TRASH_GPR(19)
+    TRASH_GPR(20)
+    TRASH_GPR(21)
+    TRASH_GPR(22)
+    TRASH_GPR(23)
+    TRASH_GPR(24)
+    TRASH_GPR(25)
+    TRASH_GPR(26)
+    TRASH_GPR(27)
+    TRASH_GPR(28)
+    TRASH_GPR(29)
+    TRASH_GPR(30)
+    TRASH_GPR(31)
+
+    RESTORE_TOC(ebb_handler)
+
+    /*
+     * r13 is our TLS pointer. We leave whatever value was in there when the
+     * EBB fired. That seems to be OK because once set the TLS pointer is not
+     * changed - but presumably that could change in future.
+     */
+
+    bl      ebb_hook
+    nop
+
+    /* r2 may be changed here but we don't care */
+
+    lfd      f0, FSCR_SAVE(r1)
+    mtfsf    0xff,f0
+    lfd      f0, VSCR_SAVE(r1)
+    mtvscr   f0
+    LOAD_VSR(0, r3)
+    LOAD_VSR(1,  r3)
+    LOAD_VSR(2,  r3)
+    LOAD_VSR(3,  r3)
+    LOAD_VSR(4,  r3)
+    LOAD_VSR(5,  r3)
+    LOAD_VSR(6,  r3)
+    LOAD_VSR(7,  r3)
+    LOAD_VSR(8,  r3)
+    LOAD_VSR(9,  r3)
+    LOAD_VSR(10, r3)
+    LOAD_VSR(11, r3)
+    LOAD_VSR(12, r3)
+    LOAD_VSR(13, r3)
+    LOAD_VSR(14, r3)
+    LOAD_VSR(15, r3)
+    LOAD_VSR(16, r3)
+    LOAD_VSR(17, r3)
+    LOAD_VSR(18, r3)
+    LOAD_VSR(19, r3)
+    LOAD_VSR(20, r3)
+    LOAD_VSR(21, r3)
+    LOAD_VSR(22, r3)
+    LOAD_VSR(23, r3)
+    LOAD_VSR(24, r3)
+    LOAD_VSR(25, r3)
+    LOAD_VSR(26, r3)
+    LOAD_VSR(27, r3)
+    LOAD_VSR(28, r3)
+    LOAD_VSR(29, r3)
+    LOAD_VSR(30, r3)
+    LOAD_VSR(31, r3)
+    LOAD_VSR(32, r3)
+    LOAD_VSR(33, r3)
+    LOAD_VSR(34, r3)
+    LOAD_VSR(35, r3)
+    LOAD_VSR(36, r3)
+    LOAD_VSR(37, r3)
+    LOAD_VSR(38, r3)
+    LOAD_VSR(39, r3)
+    LOAD_VSR(40, r3)
+    LOAD_VSR(41, r3)
+    LOAD_VSR(42, r3)
+    LOAD_VSR(43, r3)
+    LOAD_VSR(44, r3)
+    LOAD_VSR(45, r3)
+    LOAD_VSR(46, r3)
+    LOAD_VSR(47, r3)
+    LOAD_VSR(48, r3)
+    LOAD_VSR(49, r3)
+    LOAD_VSR(50, r3)
+    LOAD_VSR(51, r3)
+    LOAD_VSR(52, r3)
+    LOAD_VSR(53, r3)
+    LOAD_VSR(54, r3)
+    LOAD_VSR(55, r3)
+    LOAD_VSR(56, r3)
+    LOAD_VSR(57, r3)
+    LOAD_VSR(58, r3)
+    LOAD_VSR(59, r3)
+    LOAD_VSR(60, r3)
+    LOAD_VSR(61, r3)
+    LOAD_VSR(62, r3)
+    LOAD_VSR(63, r3)
+
+    ld      r0,XER_SAVE(r1)
+    mtxer   r0
+    ld      r0,CTR_SAVE(r1)
+    mtctr   r0
+    ld      r0,LR_SAVE(r1)
+    mtlr    r0
+    ld      r0,CCR_SAVE(r1)
+    mtcr    r0
+    REST_GPR(0)
+    REST_GPR(2)
+    REST_GPR(3)
+    REST_GPR(4)
+    REST_GPR(5)
+    REST_GPR(6)
+    REST_GPR(7)
+    REST_GPR(8)
+    REST_GPR(9)
+    REST_GPR(10)
+    REST_GPR(11)
+    REST_GPR(12)
+    REST_GPR(13)
+    REST_GPR(14)
+    REST_GPR(15)
+    REST_GPR(16)
+    REST_GPR(17)
+    REST_GPR(18)
+    REST_GPR(19)
+    REST_GPR(20)
+    REST_GPR(21)
+    REST_GPR(22)
+    REST_GPR(23)
+    REST_GPR(24)
+    REST_GPR(25)
+    REST_GPR(26)
+    REST_GPR(27)
+    REST_GPR(28)
+    REST_GPR(29)
+    REST_GPR(30)
+    REST_GPR(31)
+    addi    r1,r1,STACK_FRAME
+    RFEBB
+FUNC_END(ebb_handler)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c
new file mode 100644
index 000000000..1e7b7fe23
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. Nothing interesting happens, because
+ * even though the event is enabled and running the child hasn't enabled the
+ * actual delivery of the EBBs.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+	int i;
+
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	/* Parent creates EBB event */
+
+	FAIL_IF(wait_for_parent(read_pipe));
+	FAIL_IF(notify_parent(write_pipe));
+
+	/* Check the EBB is enabled by writing PMC1 */
+	write_pmc1();
+
+	/* EBB event is enabled here */
+	for (i = 0; i < 1000000; i++) ;
+
+	return 0;
+}
+
+int ebb_on_child(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+
+	SKIP_IF(!ebb_is_supported());
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(victim_child(write_pipe, read_pipe));
+	}
+
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child is running now */
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(&event, pid));
+	FAIL_IF(ebb_event_enable(&event));
+
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child should just exit happily */
+	FAIL_IF(wait_for_child(pid));
+
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_on_child, "ebb_on_child");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
new file mode 100644
index 000000000..174e4f4da
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. The child expects this and enables
+ * EBBs, which are then delivered to the child, even though the event is
+ * created by the parent.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+	FAIL_IF(wait_for_parent(read_pipe));
+
+	/* Setup our EBB handler, before the EBB event is created */
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(notify_parent(write_pipe));
+
+	while (ebb_state.stats.ebb_count < 20) {
+		FAIL_IF(core_busy_loop());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	dump_ebb_state();
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+/* Tests we can setup an EBB on our child - if it's expecting it */
+int ebb_on_willing_child(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+
+	SKIP_IF(!ebb_is_supported());
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(victim_child(write_pipe, read_pipe));
+	}
+
+	/* Signal the child to setup its EBB handler */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Child is running now */
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(&event, pid));
+	FAIL_IF(ebb_event_enable(&event));
+
+	/* Child show now take EBBs and then exit */
+	FAIL_IF(wait_for_child(pid));
+
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_on_willing_child, "ebb_on_willing_child");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
new file mode 100644
index 000000000..af20a2b36
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests an EBB vs a cpu event - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(event, cpu));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int ebb_vs_cpu_event(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	int cpu, rc;
+	pid_t pid;
+
+	SKIP_IF(!ebb_is_supported());
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* Now try to install our CPU event */
+	rc = setup_cpu_event(&event, cpu);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	/* .. and wait for it to complete */
+	FAIL_IF(wait_for_child(pid));
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The cpu event may have run, but we don't expect 100% */
+	FAIL_IF(event.result.enabled >= event.result.running);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ebb_vs_cpu_event, "ebb_vs_cpu_event");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c
new file mode 100644
index 000000000..7762ab26e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test various attributes of the EBB event are enforced.
+ */
+int event_attributes(void)
+{
+	struct event event, leader;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	/* Expected to succeed */
+	FAIL_IF(event_open(&event));
+	event_close(&event);
+
+
+	event_init(&event, 0x001e); /* CYCLES - no PMC specified */
+	event_leader_ebb_init(&event);
+	/* Expected to fail, no PMC specified */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x2001e);
+	event_leader_ebb_init(&event);
+	event.attr.exclusive = 0;
+	/* Expected to fail, not exclusive */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x3001e);
+	event_leader_ebb_init(&event);
+	event.attr.freq = 1;
+	/* Expected to fail, sets freq */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x4001e);
+	event_leader_ebb_init(&event);
+	event.attr.sample_period = 1;
+	/* Expected to fail, sets sample_period */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	event.attr.enable_on_exec = 1;
+	/* Expected to fail, sets enable_on_exec */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	event.attr.inherit = 1;
+	/* Expected to fail, sets inherit */
+	FAIL_IF(event_open(&event) == 0);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+	event_ebb_init(&event);
+
+	/* Expected to succeed */
+	FAIL_IF(event_open_with_group(&event, leader.fd));
+	event_close(&leader);
+	event_close(&event);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+
+	/* Expected to fail, event doesn't request EBB, leader does */
+	FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+	event_close(&leader);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	/* Clear the EBB flag */
+	leader.attr.config &= ~(1ull << 63);
+
+	FAIL_IF(event_open(&leader));
+
+	event_init(&event, 0x20002);
+	event_ebb_init(&event);
+
+	/* Expected to fail, leader doesn't request EBB */
+	FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+	event_close(&leader);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	leader.attr.exclusive = 0;
+	/* Expected to fail, leader isn't exclusive */
+	FAIL_IF(event_open(&leader) == 0);
+
+
+	event_init(&leader, 0x1001e);
+	event_leader_ebb_init(&leader);
+	leader.attr.pinned = 0;
+	/* Expected to fail, leader isn't pinned */
+	FAIL_IF(event_open(&leader) == 0);
+
+	event_init(&event, 0x1001e);
+	event_leader_ebb_init(&event);
+	/* Expected to fail, not a task event */
+	SKIP_IF(require_paranoia_below(1));
+	FAIL_IF(event_open_with_cpu(&event, 0) == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(event_attributes, "event_attributes");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
new file mode 100644
index 000000000..b866a0581
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+	.text
+
+FUNC_START(thirty_two_instruction_loop)
+	cmpwi	r3,0
+	beqlr
+	addi	r4,r3,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1	# 28 addi's
+	subi	r3,r3,1
+	b	FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c
new file mode 100644
index 000000000..167135bd9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that a fork clears the PMU state of the child. eg. BESCR/EBBHR/EBBRR
+ * are cleared, and MMCR0_PMCC is reset, preventing the child from accessing
+ * the PMU.
+ */
+
+static struct event event;
+
+static int child(void)
+{
+	/* Even though we have EBE=0 we can still see the EBB regs */
+	FAIL_IF(mfspr(SPRN_BESCR) != 0);
+	FAIL_IF(mfspr(SPRN_EBBHR) != 0);
+	FAIL_IF(mfspr(SPRN_EBBRR) != 0);
+
+	FAIL_IF(catch_sigill(write_pmc1));
+
+	/* We can still read from the event, though it is on our parent */
+	FAIL_IF(event_read(&event));
+
+	return 0;
+}
+
+/* Tests that fork clears EBB state */
+int fork_cleanup(void)
+{
+	pid_t pid;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_MMCR0, MMCR0_FC);
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* Don't need to actually take any EBBs */
+
+	pid = fork();
+	if (pid == 0)
+		exit(child());
+
+	/* Child does the actual testing */
+	FAIL_IF(wait_for_child(pid));
+
+	/* After fork */
+	event_close(&event);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(fork_cleanup, "fork_cleanup");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
new file mode 100644
index 000000000..35a3426e3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Run a calibrated instruction loop and count instructions executed using
+ * EBBs. Make sure the counts look right.
+ */
+
+extern void thirty_two_instruction_loop(uint64_t loops);
+
+static bool counters_frozen = true;
+
+static int do_count_loop(struct event *event, uint64_t instructions,
+			 uint64_t overhead, bool report)
+{
+	int64_t difference, expected;
+	double percentage;
+
+	clear_ebb_stats();
+
+	counters_frozen = false;
+	mb();
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+	thirty_two_instruction_loop(instructions >> 5);
+
+	counters_frozen = true;
+	mb();
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+
+	count_pmc(4, sample_period);
+
+	event->result.value = ebb_state.stats.pmc_count[4-1];
+	expected = instructions + overhead;
+	difference = event->result.value - expected;
+	percentage = (double)difference / event->result.value * 100;
+
+	if (report) {
+		printf("Looped for %lu instructions, overhead %lu\n", instructions, overhead);
+		printf("Expected %lu\n", expected);
+		printf("Actual   %llu\n", event->result.value);
+		printf("Delta    %ld, %f%%\n", difference, percentage);
+		printf("Took %d EBBs\n", ebb_state.stats.ebb_count);
+	}
+
+	if (difference < 0)
+		difference = -difference;
+
+	/* Tolerate a difference of up to 0.0001 % */
+	difference *= 10000 * 100;
+	if (difference / event->result.value)
+		return -1;
+
+	return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static uint64_t determine_overhead(struct event *event)
+{
+	uint64_t current, overhead;
+	int i;
+
+	do_count_loop(event, 0, 0, false);
+	overhead = event->result.value;
+
+	for (i = 0; i < 100; i++) {
+		do_count_loop(event, 0, 0, false);
+		current = event->result.value;
+		if (current < overhead) {
+			printf("Replacing overhead %lu with %lu\n", overhead, current);
+			overhead = current;
+		}
+	}
+
+	return overhead;
+}
+
+static void pmc4_ebb_callee(void)
+{
+	uint64_t val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	count_pmc(4, sample_period);
+out:
+	if (counters_frozen)
+		reset_ebb_with_clear_mask(MMCR0_PMAO);
+	else
+		reset_ebb();
+}
+
+int instruction_count(void)
+{
+	struct event event;
+	uint64_t overhead;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init_named(&event, 0x400FA, "PM_RUN_INST_CMPL");
+	event_leader_ebb_init(&event);
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+	FAIL_IF(ebb_event_enable(&event));
+
+	sample_period = COUNTER_OVERFLOW;
+
+	setup_ebb_handler(pmc4_ebb_callee);
+	mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+	ebb_global_enable();
+
+	overhead = determine_overhead(&event);
+	printf("Overhead of null loop: %lu instructions\n", overhead);
+
+	/* Run for 1M instructions */
+	FAIL_IF(do_count_loop(&event, 0x100000, overhead, true));
+
+	/* Run for 10M instructions */
+	FAIL_IF(do_count_loop(&event, 0xa00000, overhead, true));
+
+	/* Run for 100M instructions */
+	FAIL_IF(do_count_loop(&event, 0x6400000, overhead, true));
+
+	/* Run for 1G instructions */
+	FAIL_IF(do_count_loop(&event, 0x40000000, overhead, true));
+
+	/* Run for 16G instructions */
+	FAIL_IF(do_count_loop(&event, 0x400000000, overhead, true));
+
+	/* Run for 64G instructions */
+	FAIL_IF(do_count_loop(&event, 0x1000000000, overhead, true));
+
+	/* Run for 128G instructions */
+	FAIL_IF(do_count_loop(&event, 0x2000000000, overhead, true));
+
+	ebb_global_disable();
+	event_close(&event);
+
+	printf("Finished OK\n");
+
+	return 0;
+}
+
+int main(void)
+{
+	test_harness_set_timeout(300);
+	return test_harness(instruction_count, "instruction_count");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
new file mode 100644
index 000000000..dddb95938
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that tries to trigger CPU_FTR_PMAO_BUG. Which is a hardware defect
+ * where an exception triggers but we context switch before it is delivered and
+ * lose the exception.
+ */
+
+static int test_body(void)
+{
+	int i, orig_period, max_period;
+	struct event event;
+
+	SKIP_IF(!ebb_is_supported());
+
+	/* We use PMC4 to make sure the kernel switches all counters correctly */
+	event_init_named(&event, 0x40002, "instructions");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(4);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+	FAIL_IF(ebb_event_enable(&event));
+
+	/*
+	 * We want a low sample period, but we also want to get out of the EBB
+	 * handler without tripping up again.
+	 *
+	 * This value picked after much experimentation.
+	 */
+	orig_period = max_period = sample_period = 400;
+
+	mtspr(SPRN_PMC4, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 1000000) {
+		/*
+		 * We are trying to get the EBB exception to race exactly with
+		 * us entering the kernel to do the syscall. We then need the
+		 * kernel to decide our timeslice is up and context switch to
+		 * the other thread. When we come back our EBB will have been
+		 * lost and we'll spin in this while loop forever.
+		 */
+
+		for (i = 0; i < 100000; i++)
+			sched_yield();
+
+		/* Change the sample period slightly to try and hit the race */
+		if (sample_period >= (orig_period + 200))
+			sample_period = orig_period;
+		else
+			sample_period++;
+
+		if (sample_period > max_period)
+			max_period = sample_period;
+	}
+
+	ebb_freeze_pmcs();
+	ebb_global_disable();
+
+	mtspr(SPRN_PMC4, 0xdead);
+
+	dump_summary_ebb_state();
+	dump_ebb_hw_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	/* We vary our sample period so we need extra fudge here */
+	FAIL_IF(!ebb_check_count(4, orig_period, 2 * (max_period - orig_period)));
+
+	return 0;
+}
+
+static int lost_exception(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(void)
+{
+	test_harness_set_timeout(300);
+	return test_harness(lost_exception, "lost_exception");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
new file mode 100644
index 000000000..035c02273
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test counting multiple events using EBBs.
+ */
+int multi_counter(void)
+{
+	struct event events[6];
+	int i, group_fd;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init_named(&events[0], 0x1001C, "PM_CMPLU_STALL_THRD");
+	event_init_named(&events[1], 0x2D016, "PM_CMPLU_STALL_FXU");
+	event_init_named(&events[2], 0x30006, "PM_CMPLU_STALL_OTHER_CMPL");
+	event_init_named(&events[3], 0x4000A, "PM_CMPLU_STALL");
+	event_init_named(&events[4], 0x600f4, "PM_RUN_CYC");
+	event_init_named(&events[5], 0x500fa, "PM_RUN_INST_CMPL");
+
+	event_leader_ebb_init(&events[0]);
+	for (i = 1; i < 6; i++)
+		event_ebb_init(&events[i]);
+
+	group_fd = -1;
+	for (i = 0; i < 6; i++) {
+		events[i].attr.exclude_kernel = 1;
+		events[i].attr.exclude_hv = 1;
+		events[i].attr.exclude_idle = 1;
+
+		FAIL_IF(event_open_with_group(&events[i], group_fd));
+		if (group_fd == -1)
+			group_fd = events[0].fd;
+	}
+
+	ebb_enable_pmc_counting(1);
+	ebb_enable_pmc_counting(2);
+	ebb_enable_pmc_counting(3);
+	ebb_enable_pmc_counting(4);
+	ebb_enable_pmc_counting(5);
+	ebb_enable_pmc_counting(6);
+	setup_ebb_handler(standard_ebb_callee);
+
+	FAIL_IF(ioctl(events[0].fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP));
+	FAIL_IF(event_read(&events[0]));
+
+	ebb_global_enable();
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC2, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC3, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC4, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC5, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC6, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 50) {
+		FAIL_IF(core_busy_loop());
+		FAIL_IF(ebb_check_mmcr0());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	dump_ebb_state();
+
+	for (i = 0; i < 6; i++)
+		event_close(&events[i]);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(multi_counter, "multi_counter");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
new file mode 100644
index 000000000..3e9d4ac96
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test running multiple EBB using processes at once on a single CPU. They
+ * should all run happily without interfering with each other.
+ */
+
+static bool child_should_exit;
+
+static void sigint_handler(int signal)
+{
+	child_should_exit = true;
+}
+
+struct sigaction sigint_action = {
+	.sa_handler = sigint_handler,
+};
+
+static int cycles_child(void)
+{
+	struct event event;
+
+	if (sigaction(SIGINT, &sigint_action, NULL)) {
+		perror("sigaction");
+		return 1;
+	}
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	ebb_enable_pmc_counting(1);
+	setup_ebb_handler(standard_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (!child_should_exit) {
+		FAIL_IF(core_busy_loop());
+		FAIL_IF(ebb_check_mmcr0());
+	}
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	dump_summary_ebb_state();
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+	return 0;
+}
+
+#define NR_CHILDREN	4
+
+int multi_ebb_procs(void)
+{
+	pid_t pids[NR_CHILDREN];
+	int cpu, rc, i;
+
+	SKIP_IF(!ebb_is_supported());
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	for (i = 0; i < NR_CHILDREN; i++) {
+		pids[i] = fork();
+		if (pids[i] == 0)
+			exit(cycles_child());
+	}
+
+	/* Have them all run for "a while" */
+	sleep(10);
+
+	rc = 0;
+	for (i = 0; i < NR_CHILDREN; i++) {
+		/* Tell them to stop */
+		kill(pids[i], SIGINT);
+		/* And wait */
+		rc |= wait_for_child(pids[i]);
+	}
+
+	return rc;
+}
+
+int main(void)
+{
+	return test_harness(multi_ebb_procs, "multi_ebb_procs");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
new file mode 100644
index 000000000..87630d44f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/* Test that things work sanely if we have no handler */
+
+static int no_handler_test(void)
+{
+	struct event event;
+	u64 val;
+	int i;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+	FAIL_IF(ebb_event_enable(&event));
+
+	val = mfspr(SPRN_EBBHR);
+	FAIL_IF(val != 0);
+
+	/* Make sure it overflows quickly */
+	sample_period = 1000;
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	/* Spin to make sure the event has time to overflow */
+	for (i = 0; i < 1000; i++)
+		mb();
+
+	dump_ebb_state();
+
+	/* We expect to see the PMU frozen & PMAO set */
+	val = mfspr(SPRN_MMCR0);
+	FAIL_IF(val != 0x0000000080000080);
+
+	event_close(&event);
+
+	/* The real test is that we never took an EBB at 0x0 */
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(no_handler_test,"no_handler_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
new file mode 100644
index 000000000..d90891fe9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that the kernel properly handles PMAE across context switches.
+ *
+ * We test this by calling into the kernel inside our EBB handler, where PMAE
+ * is clear. A cpu eater companion thread is running on the same CPU as us to
+ * encourage the scheduler to switch us.
+ *
+ * The kernel must make sure that when it context switches us back in, it
+ * honours the fact that we had PMAE clear.
+ *
+ * Observed to hit the failing case on the first EBB with a broken kernel.
+ */
+
+static bool mmcr0_mismatch;
+static uint64_t before, after;
+
+static void syscall_ebb_callee(void)
+{
+	uint64_t val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	count_pmc(1, sample_period);
+
+	before = mfspr(SPRN_MMCR0);
+
+	/* Try and get ourselves scheduled, to force a PMU context switch */
+	sched_yield();
+
+	after = mfspr(SPRN_MMCR0);
+	if (before != after)
+		mmcr0_mismatch = true;
+
+out:
+	reset_ebb();
+}
+
+static int test_body(void)
+{
+	struct event event;
+
+	SKIP_IF(!ebb_is_supported());
+
+	event_init_named(&event, 0x1001e, "cycles");
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(syscall_ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+	while (ebb_state.stats.ebb_count < 20 && !mmcr0_mismatch)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	dump_ebb_state();
+
+	if (mmcr0_mismatch)
+		printf("Saw MMCR0 before 0x%lx after 0x%lx\n", before, after);
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0);
+	FAIL_IF(mmcr0_mismatch);
+
+	return 0;
+}
+
+int pmae_handling(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(void)
+{
+	return test_harness(pmae_handling, "pmae_handling");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
new file mode 100644
index 000000000..8ca92b9ee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that PMC5 & 6 are frozen (ie. don't overflow) when they are not being
+ * used. Tests the MMCR0_FC56 logic in the kernel.
+ */
+
+static int pmc56_overflowed;
+
+static void ebb_callee(void)
+{
+	uint64_t val;
+
+	val = mfspr(SPRN_BESCR);
+	if (!(val & BESCR_PMEO)) {
+		ebb_state.stats.spurious++;
+		goto out;
+	}
+
+	ebb_state.stats.ebb_count++;
+	count_pmc(2, sample_period);
+
+	val = mfspr(SPRN_PMC5);
+	if (val >= COUNTER_OVERFLOW)
+		pmc56_overflowed++;
+
+	count_pmc(5, COUNTER_OVERFLOW);
+
+	val = mfspr(SPRN_PMC6);
+	if (val >= COUNTER_OVERFLOW)
+		pmc56_overflowed++;
+
+	count_pmc(6, COUNTER_OVERFLOW);
+
+out:
+	reset_ebb();
+}
+
+int pmc56_overflow(void)
+{
+	struct event event;
+
+	SKIP_IF(!ebb_is_supported());
+
+	/* Use PMC2 so we set PMCjCE, which enables PMC5/6 */
+	event_init(&event, 0x2001e);
+	event_leader_ebb_init(&event);
+
+	event.attr.exclude_kernel = 1;
+	event.attr.exclude_hv = 1;
+	event.attr.exclude_idle = 1;
+
+	FAIL_IF(event_open(&event));
+
+	setup_ebb_handler(ebb_callee);
+	ebb_global_enable();
+
+	FAIL_IF(ebb_event_enable(&event));
+
+	mtspr(SPRN_PMC2, pmc_sample_period(sample_period));
+	mtspr(SPRN_PMC5, 0);
+	mtspr(SPRN_PMC6, 0);
+
+	while (ebb_state.stats.ebb_count < 10)
+		FAIL_IF(core_busy_loop());
+
+	ebb_global_disable();
+	ebb_freeze_pmcs();
+
+	dump_ebb_state();
+
+	printf("PMC5/6 overflow %d\n", pmc56_overflowed);
+
+	event_close(&event);
+
+	FAIL_IF(ebb_state.stats.ebb_count == 0 || pmc56_overflowed != 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(pmc56_overflow, "pmc56_overflow");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c
new file mode 100644
index 000000000..f923228bc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test basic access to the EBB regs, they should be user accessible with no
+ * kernel interaction required.
+ */
+int reg_access(void)
+{
+	uint64_t val, expected;
+
+	SKIP_IF(!ebb_is_supported());
+
+	expected = 0x8000000100000000ull;
+	mtspr(SPRN_BESCR, expected);
+	val = mfspr(SPRN_BESCR);
+
+	FAIL_IF(val != expected);
+
+	expected = 0x0000000001000000ull;
+	mtspr(SPRN_EBBHR, expected);
+	val = mfspr(SPRN_EBBHR);
+
+	FAIL_IF(val != expected);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(reg_access, "reg_access");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
new file mode 100644
index 000000000..1846f4e84
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned per-task event vs an EBB - in that order. The pinned per-task
+ * event should prevent the EBB event from being enabled.
+ */
+
+static int setup_child_event(struct event *event, pid_t child_pid)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.pinned = 1;
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(event, child_pid));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int task_event_pinned_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+	int rc;
+
+	SKIP_IF(!ebb_is_supported());
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the task event first */
+	rc = setup_child_event(&event, pid);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* We expect it to fail to read the event */
+	FAIL_IF(wait_for_child(pid) != 2);
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	FAIL_IF(event.result.value == 0);
+	/*
+	 * For reasons I don't understand enabled is usually just slightly
+	 * lower than running. Would be good to confirm why.
+	 */
+	FAIL_IF(event.result.enabled == 0);
+	FAIL_IF(event.result.running == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(task_event_pinned_vs_ebb, "task_event_pinned_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c
new file mode 100644
index 000000000..e3bc6e92a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a per-task event vs an EBB - in that order. The EBB should push the
+ * per-task event off the PMU.
+ */
+
+static int setup_child_event(struct event *event, pid_t child_pid)
+{
+	event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+	event->attr.exclude_kernel = 1;
+	event->attr.exclude_hv = 1;
+	event->attr.exclude_idle = 1;
+
+	FAIL_IF(event_open_with_pid(event, child_pid));
+	FAIL_IF(event_enable(event));
+
+	return 0;
+}
+
+int task_event_vs_ebb(void)
+{
+	union pipe read_pipe, write_pipe;
+	struct event event;
+	pid_t pid;
+	int rc;
+
+	SKIP_IF(!ebb_is_supported());
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		/* NB order of pipes looks reversed */
+		exit(ebb_child(write_pipe, read_pipe));
+	}
+
+	/* We setup the task event first */
+	rc = setup_child_event(&event, pid);
+	if (rc) {
+		kill_child_and_wait(pid);
+		return rc;
+	}
+
+	/* Signal the child to install its EBB event and wait */
+	if (sync_with_child(read_pipe, write_pipe))
+		/* If it fails, wait for it to exit */
+		goto wait;
+
+	/* Signal the child to run */
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+	/* The EBB event should push the task event off so the child should succeed */
+	FAIL_IF(wait_for_child(pid));
+	FAIL_IF(event_disable(&event));
+	FAIL_IF(event_read(&event));
+
+	event_report(&event);
+
+	/* The task event may have run, or not so we can't assert anything about it */
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(task_event_vs_ebb, "task_event_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.c b/tools/testing/selftests/powerpc/pmu/ebb/trace.c
new file mode 100644
index 000000000..251e66ab2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "trace.h"
+
+
+struct trace_buffer *trace_buffer_allocate(u64 size)
+{
+	struct trace_buffer *tb;
+
+	if (size < sizeof(*tb)) {
+		fprintf(stderr, "Error: trace buffer too small\n");
+		return NULL;
+	}
+
+	tb = mmap(NULL, size, PROT_READ | PROT_WRITE,
+		  MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (tb == MAP_FAILED) {
+		perror("mmap");
+		return NULL;
+	}
+
+	tb->size = size;
+	tb->tail = tb->data;
+	tb->overflow = false;
+
+	return tb;
+}
+
+static bool trace_check_bounds(struct trace_buffer *tb, void *p)
+{
+	return p < ((void *)tb + tb->size);
+}
+
+static bool trace_check_alloc(struct trace_buffer *tb, void *p)
+{
+	/*
+	 * If we ever overflowed don't allow any more input. This prevents us
+	 * from dropping a large item and then later logging a small one. The
+	 * buffer should just stop when overflow happened, not be patchy. If
+	 * you're overflowing, make your buffer bigger.
+	 */
+	if (tb->overflow)
+		return false;
+
+	if (!trace_check_bounds(tb, p)) {
+		tb->overflow = true;
+		return false;
+	}
+
+	return true;
+}
+
+static void *trace_alloc(struct trace_buffer *tb, int bytes)
+{
+	void *p, *newtail;
+
+	p = tb->tail;
+	newtail = tb->tail + bytes;
+	if (!trace_check_alloc(tb, newtail))
+		return NULL;
+
+	tb->tail = newtail;
+
+	return p;
+}
+
+static struct trace_entry *trace_alloc_entry(struct trace_buffer *tb, int payload_size)
+{
+	struct trace_entry *e;
+
+	e = trace_alloc(tb, sizeof(*e) + payload_size);
+	if (e)
+		e->length = payload_size;
+
+	return e;
+}
+
+int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value)
+{
+	struct trace_entry *e;
+	u64 *p;
+
+	e = trace_alloc_entry(tb, sizeof(reg) + sizeof(value));
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_REG;
+	p = (u64 *)e->data;
+	*p++ = reg;
+	*p++ = value;
+
+	return 0;
+}
+
+int trace_log_counter(struct trace_buffer *tb, u64 value)
+{
+	struct trace_entry *e;
+	u64 *p;
+
+	e = trace_alloc_entry(tb, sizeof(value));
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_COUNTER;
+	p = (u64 *)e->data;
+	*p++ = value;
+
+	return 0;
+}
+
+int trace_log_string(struct trace_buffer *tb, char *str)
+{
+	struct trace_entry *e;
+	char *p;
+	int len;
+
+	len = strlen(str);
+
+	/* We NULL terminate to make printing easier */
+	e = trace_alloc_entry(tb, len + 1);
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_STRING;
+	p = (char *)e->data;
+	memcpy(p, str, len);
+	p += len;
+	*p = '\0';
+
+	return 0;
+}
+
+int trace_log_indent(struct trace_buffer *tb)
+{
+	struct trace_entry *e;
+
+	e = trace_alloc_entry(tb, 0);
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_INDENT;
+
+	return 0;
+}
+
+int trace_log_outdent(struct trace_buffer *tb)
+{
+	struct trace_entry *e;
+
+	e = trace_alloc_entry(tb, 0);
+	if (!e)
+		return -ENOSPC;
+
+	e->type = TRACE_TYPE_OUTDENT;
+
+	return 0;
+}
+
+static void trace_print_header(int seq, int prefix)
+{
+	printf("%*s[%d]: ", prefix, "", seq);
+}
+
+static char *trace_decode_reg(int reg)
+{
+	switch (reg) {
+		case 769: return "SPRN_MMCR2"; break;
+		case 770: return "SPRN_MMCRA"; break;
+		case 779: return "SPRN_MMCR0"; break;
+		case 804: return "SPRN_EBBHR"; break;
+		case 805: return "SPRN_EBBRR"; break;
+		case 806: return "SPRN_BESCR"; break;
+		case 800: return "SPRN_BESCRS"; break;
+		case 801: return "SPRN_BESCRSU"; break;
+		case 802: return "SPRN_BESCRR"; break;
+		case 803: return "SPRN_BESCRRU"; break;
+		case 771: return "SPRN_PMC1"; break;
+		case 772: return "SPRN_PMC2"; break;
+		case 773: return "SPRN_PMC3"; break;
+		case 774: return "SPRN_PMC4"; break;
+		case 775: return "SPRN_PMC5"; break;
+		case 776: return "SPRN_PMC6"; break;
+		case 780: return "SPRN_SIAR"; break;
+		case 781: return "SPRN_SDAR"; break;
+		case 768: return "SPRN_SIER"; break;
+	}
+
+	return NULL;
+}
+
+static void trace_print_reg(struct trace_entry *e)
+{
+	u64 *p, *reg, *value;
+	char *name;
+
+	p = (u64 *)e->data;
+	reg = p++;
+	value = p;
+
+	name = trace_decode_reg(*reg);
+	if (name)
+		printf("register %-10s = 0x%016llx\n", name, *value);
+	else
+		printf("register %lld = 0x%016llx\n", *reg, *value);
+}
+
+static void trace_print_counter(struct trace_entry *e)
+{
+	u64 *value;
+
+	value = (u64 *)e->data;
+	printf("counter = %lld\n", *value);
+}
+
+static void trace_print_string(struct trace_entry *e)
+{
+	char *str;
+
+	str = (char *)e->data;
+	puts(str);
+}
+
+#define BASE_PREFIX	2
+#define PREFIX_DELTA	8
+
+static void trace_print_entry(struct trace_entry *e, int seq, int *prefix)
+{
+	switch (e->type) {
+	case TRACE_TYPE_REG:
+		trace_print_header(seq, *prefix);
+		trace_print_reg(e);
+		break;
+	case TRACE_TYPE_COUNTER:
+		trace_print_header(seq, *prefix);
+		trace_print_counter(e);
+		break;
+	case TRACE_TYPE_STRING:
+		trace_print_header(seq, *prefix);
+		trace_print_string(e);
+		break;
+	case TRACE_TYPE_INDENT:
+		trace_print_header(seq, *prefix);
+		puts("{");
+		*prefix += PREFIX_DELTA;
+		break;
+	case TRACE_TYPE_OUTDENT:
+		*prefix -= PREFIX_DELTA;
+		if (*prefix < BASE_PREFIX)
+			*prefix = BASE_PREFIX;
+		trace_print_header(seq, *prefix);
+		puts("}");
+		break;
+	default:
+		trace_print_header(seq, *prefix);
+		printf("entry @ %p type %d\n", e, e->type);
+		break;
+	}
+}
+
+void trace_buffer_print(struct trace_buffer *tb)
+{
+	struct trace_entry *e;
+	int i, prefix;
+	void *p;
+
+	printf("Trace buffer dump:\n");
+	printf("  address  %p \n", tb);
+	printf("  tail     %p\n", tb->tail);
+	printf("  size     %llu\n", tb->size);
+	printf("  overflow %s\n", tb->overflow ? "TRUE" : "false");
+	printf("  Content:\n");
+
+	p = tb->data;
+
+	i = 0;
+	prefix = BASE_PREFIX;
+
+	while (trace_check_bounds(tb, p) && p < tb->tail) {
+		e = p;
+
+		trace_print_entry(e, i, &prefix);
+
+		i++;
+		p = (void *)e + sizeof(*e) + e->length;
+	}
+}
+
+void trace_print_location(struct trace_buffer *tb)
+{
+	printf("Trace buffer 0x%llx bytes @ %p\n", tb->size, tb);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.h b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
new file mode 100644
index 000000000..926458e28
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_TRACE_H
+#define _SELFTESTS_POWERPC_PMU_EBB_TRACE_H
+
+#include "utils.h"
+
+#define TRACE_TYPE_REG		1
+#define TRACE_TYPE_COUNTER	2
+#define TRACE_TYPE_STRING	3
+#define TRACE_TYPE_INDENT	4
+#define TRACE_TYPE_OUTDENT	5
+
+struct trace_entry
+{
+	u8 type;
+	u8 length;
+	u8 data[0];
+};
+
+struct trace_buffer
+{
+	u64  size;
+	bool overflow;
+	void *tail;
+	u8   data[0];
+};
+
+struct trace_buffer *trace_buffer_allocate(u64 size);
+int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value);
+int trace_log_counter(struct trace_buffer *tb, u64 value);
+int trace_log_string(struct trace_buffer *tb, char *str);
+int trace_log_indent(struct trace_buffer *tb);
+int trace_log_outdent(struct trace_buffer *tb);
+void trace_buffer_print(struct trace_buffer *tb);
+void trace_print_location(struct trace_buffer *tb);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_TRACE_H */
diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c
new file mode 100644
index 000000000..184b36807
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include "event.h"
+
+
+int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
+		int group_fd, unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, attr, pid, cpu,
+			   group_fd, flags);
+}
+
+void event_init_opts(struct event *e, u64 config, int type, char *name)
+{
+	memset(e, 0, sizeof(*e));
+
+	e->name = name;
+
+	e->attr.type = type;
+	e->attr.config = config;
+	e->attr.size = sizeof(e->attr);
+	/* This has to match the structure layout in the header */
+	e->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | \
+				  PERF_FORMAT_TOTAL_TIME_RUNNING;
+}
+
+void event_init_named(struct event *e, u64 config, char *name)
+{
+	event_init_opts(e, config, PERF_TYPE_RAW, name);
+}
+
+void event_init(struct event *e, u64 config)
+{
+	event_init_opts(e, config, PERF_TYPE_RAW, "event");
+}
+
+#define PERF_CURRENT_PID	0
+#define PERF_NO_PID		-1
+#define PERF_NO_CPU		-1
+#define PERF_NO_GROUP		-1
+
+int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd)
+{
+	e->fd = perf_event_open(&e->attr, pid, cpu, group_fd, 0);
+	if (e->fd == -1) {
+		perror("perf_event_open");
+		return -1;
+	}
+
+	return 0;
+}
+
+int event_open_with_group(struct event *e, int group_fd)
+{
+	return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, group_fd);
+}
+
+int event_open_with_pid(struct event *e, pid_t pid)
+{
+	return event_open_with_options(e, pid, PERF_NO_CPU, PERF_NO_GROUP);
+}
+
+int event_open_with_cpu(struct event *e, int cpu)
+{
+	return event_open_with_options(e, PERF_NO_PID, cpu, PERF_NO_GROUP);
+}
+
+int event_open(struct event *e)
+{
+	return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, PERF_NO_GROUP);
+}
+
+void event_close(struct event *e)
+{
+	close(e->fd);
+}
+
+int event_enable(struct event *e)
+{
+	return ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+}
+
+int event_disable(struct event *e)
+{
+	return ioctl(e->fd, PERF_EVENT_IOC_DISABLE);
+}
+
+int event_reset(struct event *e)
+{
+	return ioctl(e->fd, PERF_EVENT_IOC_RESET);
+}
+
+int event_read(struct event *e)
+{
+	int rc;
+
+	rc = read(e->fd, &e->result, sizeof(e->result));
+	if (rc != sizeof(e->result)) {
+		fprintf(stderr, "read error on event %p!\n", e);
+		return -1;
+	}
+
+	return 0;
+}
+
+void event_report_justified(struct event *e, int name_width, int result_width)
+{
+	printf("%*s: result %*llu ", name_width, e->name, result_width,
+	       e->result.value);
+
+	if (e->result.running == e->result.enabled)
+		printf("running/enabled %llu\n", e->result.running);
+	else
+		printf("running %llu enabled %llu\n", e->result.running,
+			e->result.enabled);
+}
+
+void event_report(struct event *e)
+{
+	event_report_justified(e, 0, 0);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h
new file mode 100644
index 000000000..a0ea6b1ee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EVENT_H
+#define _SELFTESTS_POWERPC_PMU_EVENT_H
+
+#include <unistd.h>
+#include <linux/perf_event.h>
+
+#include "utils.h"
+
+
+struct event {
+	struct perf_event_attr attr;
+	char *name;
+	int fd;
+	/* This must match the read_format we use */
+	struct {
+		u64 value;
+		u64 running;
+		u64 enabled;
+	} result;
+};
+
+void event_init(struct event *e, u64 config);
+void event_init_named(struct event *e, u64 config, char *name);
+void event_init_opts(struct event *e, u64 config, int type, char *name);
+int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd);
+int event_open_with_group(struct event *e, int group_fd);
+int event_open_with_pid(struct event *e, pid_t pid);
+int event_open_with_cpu(struct event *e, int cpu);
+int event_open(struct event *e);
+void event_close(struct event *e);
+int event_enable(struct event *e);
+int event_disable(struct event *e);
+int event_reset(struct event *e);
+int event_read(struct event *e);
+void event_report_justified(struct event *e, int name_width, int result_width);
+void event_report(struct event *e);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EVENT_H */
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
new file mode 100644
index 000000000..77472f314
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "event.h"
+#include "utils.h"
+
+#define MALLOC_SIZE     (0x10000 * 10)  /* Ought to be enough .. */
+
+/*
+ * Tests that the L3 bank handling is correct. We fixed it in commit e9aaac1.
+ */
+static int l3_bank_test(void)
+{
+	struct event event;
+	char *p;
+	int i;
+
+	p = malloc(MALLOC_SIZE);
+	FAIL_IF(!p);
+
+	event_init(&event, 0x84918F);
+
+	FAIL_IF(event_open(&event));
+
+	for (i = 0; i < MALLOC_SIZE; i += 0x10000)
+		p[i] = i;
+
+	event_read(&event);
+	event_report(&event);
+
+	FAIL_IF(event.result.running == 0);
+	FAIL_IF(event.result.enabled == 0);
+
+	event_close(&event);
+	free(p);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(l3_bank_test, "l3_bank_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c
new file mode 100644
index 000000000..5bf5dd408
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE	/* For CPU_ZERO etc. */
+
+#include <errno.h>
+#include <sched.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+#include "lib.h"
+
+
+int bind_to_cpu(int cpu)
+{
+	cpu_set_t mask;
+
+	printf("Binding to cpu %d\n", cpu);
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+
+	return sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+#define PARENT_TOKEN	0xAA
+#define CHILD_TOKEN	0x55
+
+int sync_with_child(union pipe read_pipe, union pipe write_pipe)
+{
+	char c = PARENT_TOKEN;
+
+	FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+	FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1);
+	if (c != CHILD_TOKEN) /* sometimes expected */
+		return 1;
+
+	return 0;
+}
+
+int wait_for_parent(union pipe read_pipe)
+{
+	char c;
+
+	FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1);
+	FAIL_IF(c != PARENT_TOKEN);
+
+	return 0;
+}
+
+int notify_parent(union pipe write_pipe)
+{
+	char c = CHILD_TOKEN;
+
+	FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+
+	return 0;
+}
+
+int notify_parent_of_error(union pipe write_pipe)
+{
+	char c = ~CHILD_TOKEN;
+
+	FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+
+	return 0;
+}
+
+int wait_for_child(pid_t child_pid)
+{
+	int rc;
+
+	if (waitpid(child_pid, &rc, 0) == -1) {
+		perror("waitpid");
+		return 1;
+	}
+
+	if (WIFEXITED(rc))
+		rc = WEXITSTATUS(rc);
+	else
+		rc = 1; /* Signal or other */
+
+	return rc;
+}
+
+int kill_child_and_wait(pid_t child_pid)
+{
+	kill(child_pid, SIGTERM);
+
+	return wait_for_child(child_pid);
+}
+
+static int eat_cpu_child(union pipe read_pipe, union pipe write_pipe)
+{
+	volatile int i = 0;
+
+	/*
+	 * We are just here to eat cpu and die. So make sure we can be killed,
+	 * and also don't do any custom SIGTERM handling.
+	 */
+	signal(SIGTERM, SIG_DFL);
+
+	notify_parent(write_pipe);
+	wait_for_parent(read_pipe);
+
+	/* Soak up cpu forever */
+	while (1) i++;
+
+	return 0;
+}
+
+pid_t eat_cpu(int (test_function)(void))
+{
+	union pipe read_pipe, write_pipe;
+	int cpu, rc;
+	pid_t pid;
+
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+	FAIL_IF(bind_to_cpu(cpu));
+
+	if (pipe(read_pipe.fds) == -1)
+		return -1;
+
+	if (pipe(write_pipe.fds) == -1)
+		return -1;
+
+	pid = fork();
+	if (pid == 0)
+		exit(eat_cpu_child(write_pipe, read_pipe));
+
+	if (sync_with_child(read_pipe, write_pipe)) {
+		rc = -1;
+		goto out;
+	}
+
+	printf("main test running as pid %d\n", getpid());
+
+	rc = test_function();
+out:
+	kill(pid, SIGKILL);
+
+	return rc;
+}
+
+struct addr_range libc, vdso;
+
+int parse_proc_maps(void)
+{
+	unsigned long start, end;
+	char execute, name[128];
+	FILE *f;
+	int rc;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		perror("fopen");
+		return -1;
+	}
+
+	do {
+		/* This skips line with no executable which is what we want */
+		rc = fscanf(f, "%lx-%lx %*c%*c%c%*c %*x %*d:%*d %*d %127s\n",
+			    &start, &end, &execute, name);
+		if (rc <= 0)
+			break;
+
+		if (execute != 'x')
+			continue;
+
+		if (strstr(name, "libc")) {
+			libc.first = start;
+			libc.last = end - 1;
+		} else if (strstr(name, "[vdso]")) {
+			vdso.first = start;
+			vdso.last = end - 1;
+		}
+	} while(1);
+
+	fclose(f);
+
+	return 0;
+}
+
+#define PARANOID_PATH	"/proc/sys/kernel/perf_event_paranoid"
+
+bool require_paranoia_below(int level)
+{
+	long current;
+	char *end, buf[16];
+	FILE *f;
+	bool rc;
+
+	rc = false;
+
+	f = fopen(PARANOID_PATH, "r");
+	if (!f) {
+		perror("fopen");
+		goto out;
+	}
+
+	if (!fgets(buf, sizeof(buf), f)) {
+		printf("Couldn't read " PARANOID_PATH "?\n");
+		goto out_close;
+	}
+
+	current = strtol(buf, &end, 10);
+
+	if (end == buf) {
+		printf("Couldn't parse " PARANOID_PATH "?\n");
+		goto out_close;
+	}
+
+	if (current >= level)
+		goto out_close;
+
+	rc = true;
+out_close:
+	fclose(f);
+out:
+	return rc;
+}
+
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
new file mode 100644
index 000000000..0213af4ff
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef __SELFTESTS_POWERPC_PMU_LIB_H
+#define __SELFTESTS_POWERPC_PMU_LIB_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+union pipe {
+	struct {
+		int read_fd;
+		int write_fd;
+	};
+	int fds[2];
+};
+
+extern int bind_to_cpu(int cpu);
+extern int kill_child_and_wait(pid_t child_pid);
+extern int wait_for_child(pid_t child_pid);
+extern int sync_with_child(union pipe read_pipe, union pipe write_pipe);
+extern int wait_for_parent(union pipe read_pipe);
+extern int notify_parent(union pipe write_pipe);
+extern int notify_parent_of_error(union pipe write_pipe);
+extern pid_t eat_cpu(int (test_function)(void));
+extern bool require_paranoia_below(int level);
+
+struct addr_range {
+	uint64_t first, last;
+};
+
+extern struct addr_range libc, vdso;
+
+int parse_proc_maps(void);
+
+#endif /* __SELFTESTS_POWERPC_PMU_LIB_H */
diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S
new file mode 100644
index 000000000..20c1f0876
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/loop.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+	.text
+
+FUNC_START(thirty_two_instruction_loop)
+	cmpdi	r3,0
+	beqlr
+	addi	r4,r3,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1
+	addi	r4,r4,1	# 28 addi's
+	subi	r3,r3,1
+	b	FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
new file mode 100644
index 000000000..fddbbc9ca
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <elf.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "lib.h"
+#include "utils.h"
+
+/*
+ * Test that per-event excludes work.
+ */
+
+static int per_event_excludes(void)
+{
+	struct event *e, events[4];
+	char *platform;
+	int i;
+
+	platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
+	FAIL_IF(!platform);
+	SKIP_IF(strcmp(platform, "power8") != 0);
+
+	/*
+	 * We need to create the events disabled, otherwise the running/enabled
+	 * counts don't match up.
+	 */
+	e = &events[0];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions");
+	e->attr.disabled = 1;
+
+	e = &events[1];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions(k)");
+	e->attr.disabled = 1;
+	e->attr.exclude_user = 1;
+	e->attr.exclude_hv = 1;
+
+	e = &events[2];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions(h)");
+	e->attr.disabled = 1;
+	e->attr.exclude_user = 1;
+	e->attr.exclude_kernel = 1;
+
+	e = &events[3];
+	event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+			PERF_TYPE_HARDWARE, "instructions(u)");
+	e->attr.disabled = 1;
+	e->attr.exclude_hv = 1;
+	e->attr.exclude_kernel = 1;
+
+	FAIL_IF(event_open(&events[0]));
+
+	/*
+	 * The open here will fail if we don't have per event exclude support,
+	 * because the second event has an incompatible set of exclude settings
+	 * and we're asking for the events to be in a group.
+	 */
+	for (i = 1; i < 4; i++)
+		FAIL_IF(event_open_with_group(&events[i], events[0].fd));
+
+	/*
+	 * Even though the above will fail without per-event excludes we keep
+	 * testing in order to be thorough.
+	 */
+	prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+	/* Spin for a while */
+	for (i = 0; i < INT_MAX; i++)
+		asm volatile("" : : : "memory");
+
+	prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+	for (i = 0; i < 4; i++) {
+		FAIL_IF(event_read(&events[i]));
+		event_report(&events[i]);
+	}
+
+	/*
+	 * We should see that all events have enabled == running. That
+	 * shows that they were all on the PMU at once.
+	 */
+	for (i = 0; i < 4; i++)
+		FAIL_IF(events[i].result.running != events[i].result.enabled);
+
+	/*
+	 * We can also check that the result for instructions is >= all the
+	 * other counts. That's because it is counting all instructions while
+	 * the others are counting a subset.
+	 */
+	for (i = 1; i < 4; i++)
+		FAIL_IF(events[0].result.value < events[i].result.value);
+
+	for (i = 0; i < 4; i++)
+		event_close(&events[i]);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(per_event_excludes, "per_event_excludes");
+}
diff --git a/tools/testing/selftests/powerpc/primitives/.gitignore b/tools/testing/selftests/powerpc/primitives/.gitignore
new file mode 100644
index 000000000..4cc4e31be
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/.gitignore
@@ -0,0 +1 @@
+load_unaligned_zeropad
diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile
new file mode 100644
index 000000000..ea2b7bd09
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/Makefile
@@ -0,0 +1,8 @@
+CFLAGS += -I$(CURDIR)
+
+TEST_GEN_PROGS := load_unaligned_zeropad
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h
new file mode 120000
index 000000000..b14255e15
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h
@@ -0,0 +1 @@
+../.././../../../../arch/powerpc/include/asm/asm-compat.h
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-const.h b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
new file mode 120000
index 000000000..18d8be13e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/asm-const.h
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
new file mode 120000
index 000000000..8dc6d4d46
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/feature-fixups.h
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/firmware.h b/tools/testing/selftests/powerpc/primitives/asm/firmware.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/firmware.h
diff --git a/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h b/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h
diff --git a/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h
new file mode 120000
index 000000000..66c819322
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/ppc_asm.h
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/processor.h b/tools/testing/selftests/powerpc/primitives/asm/processor.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/processor.h
diff --git a/tools/testing/selftests/powerpc/primitives/linux/stringify.h b/tools/testing/selftests/powerpc/primitives/linux/stringify.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/linux/stringify.h
diff --git a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
new file mode 100644
index 000000000..ed3239bbf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
@@ -0,0 +1,159 @@
+/*
+ * Userspace test harness for load_unaligned_zeropad. Creates two
+ * pages and uses mprotect to prevent access to the second page and
+ * a SEGV handler that walks the exception tables and runs the fixup
+ * routine.
+ *
+ * The results are compared against a normal load that is that is
+ * performed while access to the second page is enabled via mprotect.
+ *
+ * Copyright (C) 2014 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#define FIXUP_SECTION ".ex_fixup"
+
+static inline unsigned long __fls(unsigned long x);
+
+#include "word-at-a-time.h"
+
+#include "utils.h"
+
+static inline unsigned long __fls(unsigned long x)
+{
+	int lz;
+
+	asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (x));
+	return sizeof(unsigned long) - 1 - lz;
+}
+
+static int page_size;
+static char *mem_region;
+
+static int protect_region(void)
+{
+	if (mprotect(mem_region + page_size, page_size, PROT_NONE)) {
+		perror("mprotect");
+		return 1;
+	}
+
+	return 0;
+}
+
+static int unprotect_region(void)
+{
+	if (mprotect(mem_region + page_size, page_size, PROT_READ|PROT_WRITE)) {
+		perror("mprotect");
+		return 1;
+	}
+
+	return 0;
+}
+
+extern char __start___ex_table[];
+extern char __stop___ex_table[];
+
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
+struct extbl_entry {
+	int insn;
+	int fixup;
+};
+
+static void segv_handler(int signr, siginfo_t *info, void *ptr)
+{
+	ucontext_t *uc = (ucontext_t *)ptr;
+	unsigned long addr = (unsigned long)info->si_addr;
+	unsigned long *ip = &UCONTEXT_NIA(uc);
+	struct extbl_entry *entry = (struct extbl_entry *)__start___ex_table;
+
+	while (entry < (struct extbl_entry *)__stop___ex_table) {
+		unsigned long insn, fixup;
+
+		insn  = (unsigned long)&entry->insn + entry->insn;
+		fixup = (unsigned long)&entry->fixup + entry->fixup;
+
+		if (insn == *ip) {
+			*ip = fixup;
+			return;
+		}
+	}
+
+	printf("No exception table match for NIA %lx ADDR %lx\n", *ip, addr);
+	abort();
+}
+
+static void setup_segv_handler(void)
+{
+	struct sigaction action;
+
+	memset(&action, 0, sizeof(action));
+	action.sa_sigaction = segv_handler;
+	action.sa_flags = SA_SIGINFO;
+	sigaction(SIGSEGV, &action, NULL);
+}
+
+static int do_one_test(char *p, int page_offset)
+{
+	unsigned long should;
+	unsigned long got;
+
+	FAIL_IF(unprotect_region());
+	should = *(unsigned long *)p;
+	FAIL_IF(protect_region());
+
+	got = load_unaligned_zeropad(p);
+
+	if (should != got) {
+		printf("offset %u load_unaligned_zeropad returned 0x%lx, should be 0x%lx\n", page_offset, got, should);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int test_body(void)
+{
+	unsigned long i;
+
+	page_size = getpagesize();
+	mem_region = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
+		MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+	FAIL_IF(mem_region == MAP_FAILED);
+
+	for (i = 0; i < page_size; i++)
+		mem_region[i] = i;
+
+	memset(mem_region+page_size, 0, page_size);
+
+	setup_segv_handler();
+
+	for (i = 0; i < page_size; i++)
+		FAIL_IF(do_one_test(mem_region+i, i));
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_body, "load_unaligned_zeropad");
+}
diff --git a/tools/testing/selftests/powerpc/primitives/word-at-a-time.h b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h
new file mode 120000
index 000000000..eb74401b5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/include/asm/word-at-a-time.h
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
new file mode 100644
index 000000000..07ec449a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -0,0 +1,12 @@
+ptrace-gpr
+ptrace-tm-gpr
+ptrace-tm-spd-gpr
+ptrace-tar
+ptrace-tm-tar
+ptrace-tm-spd-tar
+ptrace-vsx
+ptrace-tm-vsx
+ptrace-tm-spd-vsx
+ptrace-tm-spr
+ptrace-hwbreak
+perf-hwbreak
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
new file mode 100644
index 000000000..9f9423430
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
+              ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
+              ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
+              perf-hwbreak
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
+
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h
new file mode 100644
index 000000000..d7275b7b3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/child.h
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Helper functions to sync execution between parent and child processes.
+ *
+ * Copyright 2018, Thiago Jung Bauermann, IBM Corporation.
+ */
+#include <stdio.h>
+#include <stdbool.h>
+#include <semaphore.h>
+
+/*
+ * Information in a shared memory location for synchronization between child and
+ * parent.
+ */
+struct child_sync {
+	/* The parent waits on this semaphore. */
+	sem_t sem_parent;
+
+	/* If true, the child should give up as well. */
+	bool parent_gave_up;
+
+	/* The child waits on this semaphore. */
+	sem_t sem_child;
+
+	/* If true, the parent should give up as well. */
+	bool child_gave_up;
+};
+
+#define CHILD_FAIL_IF(x, sync)						\
+	do {								\
+		if (x) {						\
+			fprintf(stderr,					\
+				"[FAIL] Test FAILED on line %d\n", __LINE__); \
+			(sync)->child_gave_up = true;			\
+			prod_parent(sync);				\
+			return 1;					\
+		}							\
+	} while (0)
+
+#define PARENT_FAIL_IF(x, sync)						\
+	do {								\
+		if (x) {						\
+			fprintf(stderr,					\
+				"[FAIL] Test FAILED on line %d\n", __LINE__); \
+			(sync)->parent_gave_up = true;			\
+			prod_child(sync);				\
+			return 1;					\
+		}							\
+	} while (0)
+
+#define PARENT_SKIP_IF_UNSUPPORTED(x, sync)				\
+	do {								\
+		if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \
+			(sync)->parent_gave_up = true;			\
+			prod_child(sync);				\
+			SKIP_IF(1);					\
+		}							\
+	} while (0)
+
+int init_child_sync(struct child_sync *sync)
+{
+	int ret;
+
+	ret = sem_init(&sync->sem_parent, 1, 0);
+	if (ret) {
+		perror("Semaphore initialization failed");
+		return 1;
+	}
+
+	ret = sem_init(&sync->sem_child, 1, 0);
+	if (ret) {
+		perror("Semaphore initialization failed");
+		return 1;
+	}
+
+	return 0;
+}
+
+void destroy_child_sync(struct child_sync *sync)
+{
+	sem_destroy(&sync->sem_parent);
+	sem_destroy(&sync->sem_child);
+}
+
+int wait_child(struct child_sync *sync)
+{
+	int ret;
+
+	/* Wait until the child prods us. */
+	ret = sem_wait(&sync->sem_parent);
+	if (ret) {
+		perror("Error waiting for child");
+		return 1;
+	}
+
+	return sync->child_gave_up;
+}
+
+int prod_child(struct child_sync *sync)
+{
+	int ret;
+
+	/* Unblock the child now. */
+	ret = sem_post(&sync->sem_child);
+	if (ret) {
+		perror("Error prodding child");
+		return 1;
+	}
+
+	return 0;
+}
+
+int wait_parent(struct child_sync *sync)
+{
+	int ret;
+
+	/* Wait until the parent prods us. */
+	ret = sem_wait(&sync->sem_child);
+	if (ret) {
+		perror("Error waiting for parent");
+		return 1;
+	}
+
+	return sync->parent_gave_up;
+}
+
+int prod_parent(struct child_sync *sync)
+{
+	int ret;
+
+	/* Unblock the parent now. */
+	ret = sem_post(&sync->sem_parent);
+	if (ret) {
+		perror("Error prodding parent");
+		return 1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
new file mode 100644
index 000000000..d5c64fee0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include <limits.h>
+#include <linux/kernel.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc		384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free		385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY		0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE	0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+#define CORE_FILE_LIMIT	(5 * 1024 * 1024)	/* 5 MB should be enough */
+
+static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern";
+
+static const char user_write[] = "[User Write (Running)]";
+static const char core_read_running[] = "[Core Read (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+	struct child_sync child_sync;
+
+	/* AMR value the parent expects to read in the core file. */
+	unsigned long amr;
+
+	/* IAMR value the parent expects to read in the core file. */
+	unsigned long iamr;
+
+	/* UAMOR value the parent expects to read in the core file. */
+	unsigned long uamor;
+
+	/* When the child crashed. */
+	time_t core_time;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+	return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int sys_pkey_free(int pkey)
+{
+	return syscall(__NR_pkey_free, pkey);
+}
+
+static int increase_core_file_limit(void)
+{
+	struct rlimit rlim;
+	int ret;
+
+	ret = getrlimit(RLIMIT_CORE, &rlim);
+	FAIL_IF(ret);
+
+	if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+		rlim.rlim_cur = CORE_FILE_LIMIT;
+
+		if (rlim.rlim_max != RLIM_INFINITY &&
+		    rlim.rlim_max < CORE_FILE_LIMIT)
+			rlim.rlim_max = CORE_FILE_LIMIT;
+
+		ret = setrlimit(RLIMIT_CORE, &rlim);
+		FAIL_IF(ret);
+	}
+
+	ret = getrlimit(RLIMIT_FSIZE, &rlim);
+	FAIL_IF(ret);
+
+	if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+		rlim.rlim_cur = CORE_FILE_LIMIT;
+
+		if (rlim.rlim_max != RLIM_INFINITY &&
+		    rlim.rlim_max < CORE_FILE_LIMIT)
+			rlim.rlim_max = CORE_FILE_LIMIT;
+
+		ret = setrlimit(RLIMIT_FSIZE, &rlim);
+		FAIL_IF(ret);
+	}
+
+	return TEST_PASS;
+}
+
+static int child(struct shared_info *info)
+{
+	bool disable_execute = true;
+	int pkey1, pkey2, pkey3;
+	int *ptr, ret;
+
+	/* Wait until parent fills out the initial register values. */
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	ret = increase_core_file_limit();
+	FAIL_IF(ret);
+
+	/* Get some pkeys so that we can change their bits in the AMR. */
+	pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+	if (pkey1 < 0) {
+		pkey1 = sys_pkey_alloc(0, 0);
+		FAIL_IF(pkey1 < 0);
+
+		disable_execute = false;
+	}
+
+	pkey2 = sys_pkey_alloc(0, 0);
+	FAIL_IF(pkey2 < 0);
+
+	pkey3 = sys_pkey_alloc(0, 0);
+	FAIL_IF(pkey3 < 0);
+
+	info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2);
+
+	if (disable_execute)
+		info->iamr |= 1ul << pkeyshift(pkey1);
+	else
+		info->iamr &= ~(1ul << pkeyshift(pkey1));
+
+	info->iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
+
+	info->uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2);
+
+	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+	       user_write, info->amr, pkey1, pkey2, pkey3);
+
+	mtspr(SPRN_AMR, info->amr);
+
+	/*
+	 * We won't use pkey3. This tests whether the kernel restores the UAMOR
+	 * permissions after a key is freed.
+	 */
+	sys_pkey_free(pkey3);
+
+	info->core_time = time(NULL);
+
+	/* Crash. */
+	ptr = 0;
+	*ptr = 1;
+
+	/* Shouldn't get here. */
+	FAIL_IF(true);
+
+	return TEST_FAIL;
+}
+
+/* Return file size if filename exists and pass sanity check, or zero if not. */
+static off_t try_core_file(const char *filename, struct shared_info *info,
+			   pid_t pid)
+{
+	struct stat buf;
+	int ret;
+
+	ret = stat(filename, &buf);
+	if (ret == -1)
+		return TEST_FAIL;
+
+	/* Make sure we're not using a stale core file. */
+	return buf.st_mtime >= info->core_time ? buf.st_size : TEST_FAIL;
+}
+
+static Elf64_Nhdr *next_note(Elf64_Nhdr *nhdr)
+{
+	return (void *) nhdr + sizeof(*nhdr) +
+		__ALIGN_KERNEL(nhdr->n_namesz, 4) +
+		__ALIGN_KERNEL(nhdr->n_descsz, 4);
+}
+
+static int check_core_file(struct shared_info *info, Elf64_Ehdr *ehdr,
+			   off_t core_size)
+{
+	unsigned long *regs;
+	Elf64_Phdr *phdr;
+	Elf64_Nhdr *nhdr;
+	size_t phdr_size;
+	void *p = ehdr, *note;
+	int ret;
+
+	ret = memcmp(ehdr->e_ident, ELFMAG, SELFMAG);
+	FAIL_IF(ret);
+
+	FAIL_IF(ehdr->e_type != ET_CORE);
+	FAIL_IF(ehdr->e_machine != EM_PPC64);
+	FAIL_IF(ehdr->e_phoff == 0 || ehdr->e_phnum == 0);
+
+	/*
+	 * e_phnum is at most 65535 so calculating the size of the
+	 * program header cannot overflow.
+	 */
+	phdr_size = sizeof(*phdr) * ehdr->e_phnum;
+
+	/* Sanity check the program header table location. */
+	FAIL_IF(ehdr->e_phoff + phdr_size < ehdr->e_phoff);
+	FAIL_IF(ehdr->e_phoff + phdr_size > core_size);
+
+	/* Find the PT_NOTE segment. */
+	for (phdr = p + ehdr->e_phoff;
+	     (void *) phdr < p + ehdr->e_phoff + phdr_size;
+	     phdr += ehdr->e_phentsize)
+		if (phdr->p_type == PT_NOTE)
+			break;
+
+	FAIL_IF((void *) phdr >= p + ehdr->e_phoff + phdr_size);
+
+	/* Find the NT_PPC_PKEY note. */
+	for (nhdr = p + phdr->p_offset;
+	     (void *) nhdr < p + phdr->p_offset + phdr->p_filesz;
+	     nhdr = next_note(nhdr))
+		if (nhdr->n_type == NT_PPC_PKEY)
+			break;
+
+	FAIL_IF((void *) nhdr >= p + phdr->p_offset + phdr->p_filesz);
+	FAIL_IF(nhdr->n_descsz == 0);
+
+	p = nhdr;
+	note = p + sizeof(*nhdr) + __ALIGN_KERNEL(nhdr->n_namesz, 4);
+
+	regs = (unsigned long *) note;
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       core_read_running, regs[0], regs[1], regs[2]);
+
+	FAIL_IF(regs[0] != info->amr);
+	FAIL_IF(regs[1] != info->iamr);
+	FAIL_IF(regs[2] != info->uamor);
+
+	return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+	char *filenames, *filename[3];
+	int fd, i, ret, status;
+	unsigned long regs[3];
+	off_t core_size;
+	void *core;
+
+	/*
+	 * Get the initial values for AMR, IAMR and UAMOR and communicate them
+	 * to the child.
+	 */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	info->amr = regs[0];
+	info->iamr = regs[1];
+	info->uamor = regs[2];
+
+	/* Wake up child so that it can set itself up. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait(&status);
+	if (ret != pid) {
+		printf("Child's exit status not captured\n");
+		return TEST_FAIL;
+	} else if (!WIFSIGNALED(status) || !WCOREDUMP(status)) {
+		printf("Child didn't dump core\n");
+		return TEST_FAIL;
+	}
+
+	/* Construct array of core file names to try. */
+
+	filename[0] = filenames = malloc(PATH_MAX);
+	if (!filenames) {
+		perror("Error allocating memory");
+		return TEST_FAIL;
+	}
+
+	ret = snprintf(filename[0], PATH_MAX, "core-pkey.%d", pid);
+	if (ret < 0 || ret >= PATH_MAX) {
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	filename[1] = filename[0] + ret + 1;
+	ret = snprintf(filename[1], PATH_MAX - ret - 1, "core.%d", pid);
+	if (ret < 0 || ret >= PATH_MAX - ret - 1) {
+		ret = TEST_FAIL;
+		goto out;
+	}
+	filename[2] = "core";
+
+	for (i = 0; i < 3; i++) {
+		core_size = try_core_file(filename[i], info, pid);
+		if (core_size != TEST_FAIL)
+			break;
+	}
+
+	if (i == 3) {
+		printf("Couldn't find core file\n");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	fd = open(filename[i], O_RDONLY);
+	if (fd == -1) {
+		perror("Error opening core file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (core == (void *) -1) {
+		perror("Error mmaping core file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	ret = check_core_file(info, core, core_size);
+
+	munmap(core, core_size);
+	close(fd);
+	unlink(filename[i]);
+
+ out:
+	free(filenames);
+
+	return ret;
+}
+
+static int write_core_pattern(const char *core_pattern)
+{
+	size_t len = strlen(core_pattern), ret;
+	FILE *f;
+
+	f = fopen(core_pattern_file, "w");
+	SKIP_IF_MSG(!f, "Try with root privileges");
+
+	ret = fwrite(core_pattern, 1, len, f);
+	fclose(f);
+	if (ret != len) {
+		perror("Error writing to core_pattern file");
+		return TEST_FAIL;
+	}
+
+	return TEST_PASS;
+}
+
+static int setup_core_pattern(char **core_pattern_, bool *changed_)
+{
+	FILE *f;
+	char *core_pattern;
+	int ret;
+
+	core_pattern = malloc(PATH_MAX);
+	if (!core_pattern) {
+		perror("Error allocating memory");
+		return TEST_FAIL;
+	}
+
+	f = fopen(core_pattern_file, "r");
+	if (!f) {
+		perror("Error opening core_pattern file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	ret = fread(core_pattern, 1, PATH_MAX, f);
+	fclose(f);
+	if (!ret) {
+		perror("Error reading core_pattern file");
+		ret = TEST_FAIL;
+		goto out;
+	}
+
+	/* Check whether we can predict the name of the core file. */
+	if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p"))
+		*changed_ = false;
+	else {
+		ret = write_core_pattern("core-pkey.%p");
+		if (ret)
+			goto out;
+
+		*changed_ = true;
+	}
+
+	*core_pattern_ = core_pattern;
+	ret = TEST_PASS;
+
+ out:
+	if (ret)
+		free(core_pattern);
+
+	return ret;
+}
+
+static int core_pkey(void)
+{
+	char *core_pattern;
+	bool changed_core_pattern;
+	struct shared_info *info;
+	int shm_id;
+	int ret;
+	pid_t pid;
+
+	ret = setup_core_pattern(&core_pattern, &changed_core_pattern);
+	if (ret)
+		return ret;
+
+	shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+	info = shmat(shm_id, NULL, 0);
+
+	ret = init_child_sync(&info->child_sync);
+	if (ret)
+		return ret;
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		ret = TEST_FAIL;
+	} else if (pid == 0)
+		ret = child(info);
+	else
+		ret = parent(info, pid);
+
+	shmdt(info);
+
+	if (pid) {
+		destroy_child_sync(&info->child_sync);
+		shmctl(shm_id, IPC_RMID, NULL);
+
+		if (changed_core_pattern)
+			write_core_pattern(core_pattern);
+	}
+
+	free(core_pattern);
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(core_pkey, "core_pkey");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
new file mode 100644
index 000000000..60df0b5e6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -0,0 +1,195 @@
+/*
+ * perf events self profiling example test case for hw breakpoints.
+ *
+ * This tests perf PERF_TYPE_BREAKPOINT parameters
+ * 1) tests all variants of the break on read/write flags
+ * 2) tests exclude_user == 0 and 1
+ * 3) test array matches (if DAWR is supported))
+ * 4) test different numbers of breakpoints matches
+ *
+ * Configure this breakpoint, then read and write the data a number of
+ * times. Then check the output count from perf is as expected.
+ *
+ * Based on:
+ *   http://ozlabs.org/~anton/junkcode/perf_events_example1.c
+ *
+ * Copyright (C) 2018 Michael Neuling, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <unistd.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <elf.h>
+#include <pthread.h>
+#include <sys/syscall.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include "utils.h"
+
+#define MAX_LOOPS 10000
+
+#define DAWR_LENGTH_MAX ((0x3f + 1) * 8)
+
+static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
+				      int cpu, int group_fd,
+				      unsigned long flags)
+{
+	attr->size = sizeof(*attr);
+	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static inline bool breakpoint_test(int len)
+{
+	struct perf_event_attr attr;
+	int fd;
+
+	/* setup counters */
+	memset(&attr, 0, sizeof(attr));
+	attr.disabled = 1;
+	attr.type = PERF_TYPE_BREAKPOINT;
+	attr.bp_type = HW_BREAKPOINT_R;
+	/* bp_addr can point anywhere but needs to be aligned */
+	attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800;
+	attr.bp_len = len;
+	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	if (fd < 0)
+		return false;
+	close(fd);
+	return true;
+}
+
+static inline bool perf_breakpoint_supported(void)
+{
+	return breakpoint_test(4);
+}
+
+static inline bool dawr_supported(void)
+{
+	return breakpoint_test(DAWR_LENGTH_MAX);
+}
+
+static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
+{
+	int i,j;
+	struct perf_event_attr attr;
+	size_t res;
+	unsigned long long breaks, needed;
+	int readint;
+	int readintarraybig[2*DAWR_LENGTH_MAX/sizeof(int)];
+	int *readintalign;
+	volatile int *ptr;
+	int break_fd;
+	int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */
+	volatile int *k;
+
+	/* align to 0x400 boundary as required by DAWR */
+	readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) &
+			       0xfffffffffffff800);
+
+	ptr = &readint;
+	if (arraytest)
+		ptr = &readintalign[0];
+
+	/* setup counters */
+	memset(&attr, 0, sizeof(attr));
+	attr.disabled = 1;
+	attr.type = PERF_TYPE_BREAKPOINT;
+	attr.bp_type = readwriteflag;
+	attr.bp_addr = (__u64)ptr;
+	attr.bp_len = sizeof(int);
+	if (arraytest)
+		attr.bp_len = DAWR_LENGTH_MAX;
+	attr.exclude_user = exclude_user;
+	break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+	if (break_fd < 0) {
+		perror("sys_perf_event_open");
+		exit(1);
+	}
+
+	/* start counters */
+	ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+
+	/* Test a bunch of reads and writes */
+	k = &readint;
+	for (i = 0; i < loop_num; i++) {
+		if (arraytest)
+			k = &(readintalign[i % (DAWR_LENGTH_MAX/sizeof(int))]);
+
+		j = *k;
+		*k = j;
+	}
+
+	/* stop counters */
+	ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+
+	/* read and check counters */
+	res = read(break_fd, &breaks, sizeof(unsigned long long));
+	assert(res == sizeof(unsigned long long));
+	/* we read and write each loop, so subtract the ones we are counting */
+	needed = 0;
+	if (readwriteflag & HW_BREAKPOINT_R)
+		needed += loop_num;
+	if (readwriteflag & HW_BREAKPOINT_W)
+		needed += loop_num;
+	needed = needed * (1 - exclude_user);
+	printf("TESTED: addr:0x%lx brks:% 8lld loops:% 8i rw:%i !user:%i array:%i\n",
+	       (unsigned long int)ptr, breaks, loop_num, readwriteflag, exclude_user, arraytest);
+	if (breaks != needed) {
+		printf("FAILED: 0x%lx brks:%lld needed:%lli %i %i %i\n\n",
+		       (unsigned long int)ptr, breaks, needed, loop_num, readwriteflag, exclude_user);
+		return 1;
+	}
+	close(break_fd);
+
+	return 0;
+}
+
+static int runtest(void)
+{
+	int rwflag;
+	int exclude_user;
+	int ret;
+
+	/*
+	 * perf defines rwflag as two bits read and write and at least
+	 * one must be set.  So range 1-3.
+	 */
+	for (rwflag = 1 ; rwflag < 4; rwflag++) {
+		for (exclude_user = 0 ; exclude_user < 2; exclude_user++) {
+			ret = runtestsingle(rwflag, exclude_user, 0);
+			if (ret)
+				return ret;
+
+			/* if we have the dawr, we can do an array test */
+			if (!dawr_supported())
+				continue;
+			ret = runtestsingle(rwflag, exclude_user, 1);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+
+static int perf_hwbreak(void)
+{
+	srand ( time(NULL) );
+
+	SKIP_IF(!perf_breakpoint_supported());
+
+	return runtest();
+}
+
+int main(int argc, char *argv[], char **envp)
+{
+	return test_harness(perf_hwbreak, "perf_hwbreak");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
new file mode 100644
index 000000000..ca29fafee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
@@ -0,0 +1,123 @@
+/*
+ * Ptrace test for GPR/FPR registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-gpr.h"
+#include "reg.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr, *pptr;
+
+float a = FPR_1;
+float b = FPR_2;
+float c = FPR_3;
+
+void gpr(void)
+{
+	unsigned long gpr_buf[18];
+	float fpr_buf[32];
+
+	cptr = (int *)shmat(shm_id, NULL, 0);
+
+	asm __volatile__(
+		ASM_LOAD_GPR_IMMED(gpr_1)
+		ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+		:
+		: [gpr_1]"i"(GPR_1), [flt_1] "b" (&a)
+		: "memory", "r6", "r7", "r8", "r9", "r10",
+		"r11", "r12", "r13", "r14", "r15", "r16", "r17",
+		"r18", "r19", "r20", "r21", "r22", "r23", "r24",
+		"r25", "r26", "r27", "r28", "r29", "r30", "r31"
+		);
+
+	cptr[1] = 1;
+
+	while (!cptr[0])
+		asm volatile("" : : : "memory");
+
+	shmdt((void *)cptr);
+	store_gpr(gpr_buf);
+	store_fpr_single_precision(fpr_buf);
+
+	if (validate_gpr(gpr_buf, GPR_3))
+		exit(1);
+
+	if (validate_fpr_float(fpr_buf, c))
+		exit(1);
+
+	exit(0);
+}
+
+int trace_gpr(pid_t child)
+{
+	unsigned long gpr[18];
+	unsigned long fpr[32];
+
+	FAIL_IF(start_trace(child));
+	FAIL_IF(show_gpr(child, gpr));
+	FAIL_IF(validate_gpr(gpr, GPR_1));
+	FAIL_IF(show_fpr(child, fpr));
+	FAIL_IF(validate_fpr(fpr, FPR_1_REP));
+	FAIL_IF(write_gpr(child, GPR_3));
+	FAIL_IF(write_fpr(child, FPR_3_REP));
+	FAIL_IF(stop_trace(child));
+
+	return TEST_PASS;
+}
+
+int ptrace_gpr(void)
+{
+	pid_t pid;
+	int ret, status;
+
+	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		return TEST_FAIL;
+	}
+	if (pid == 0)
+		gpr();
+
+	if (pid) {
+		pptr = (int *)shmat(shm_id, NULL, 0);
+		while (!pptr[1])
+			asm volatile("" : : : "memory");
+
+		ret = trace_gpr(pid);
+		if (ret) {
+			kill(pid, SIGTERM);
+			shmdt((void *)pptr);
+			shmctl(shm_id, IPC_RMID, NULL);
+			return TEST_FAIL;
+		}
+
+		pptr[0] = 1;
+		shmdt((void *)pptr);
+
+		ret = wait(&status);
+		shmctl(shm_id, IPC_RMID, NULL);
+		if (ret != pid) {
+			printf("Child's exit status not captured\n");
+			return TEST_FAIL;
+		}
+
+		return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+			TEST_PASS;
+	}
+
+	return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_gpr, "ptrace_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h
new file mode 100644
index 000000000..e30fef638
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define GPR_1	1
+#define GPR_2	2
+#define GPR_3	3
+#define GPR_4	4
+
+#define FPR_1	0.001
+#define FPR_2	0.002
+#define FPR_3	0.003
+#define FPR_4	0.004
+
+#define FPR_1_REP 0x3f50624de0000000
+#define FPR_2_REP 0x3f60624de0000000
+#define FPR_3_REP 0x3f689374c0000000
+#define FPR_4_REP 0x3f70624de0000000
+
+/* Buffer must have 18 elements */
+int validate_gpr(unsigned long *gpr, unsigned long val)
+{
+	int i, found = 1;
+
+	for (i = 0; i < 18; i++) {
+		if (gpr[i] != val) {
+			printf("GPR[%d]: %lx Expected: %lx\n",
+				i+14, gpr[i], val);
+			found = 0;
+		}
+	}
+
+	if (!found)
+		return TEST_FAIL;
+	return TEST_PASS;
+}
+
+/* Buffer must have 32 elements */
+int validate_fpr(unsigned long *fpr, unsigned long val)
+{
+	int i, found = 1;
+
+	for (i = 0; i < 32; i++) {
+		if (fpr[i] != val) {
+			printf("FPR[%d]: %lx Expected: %lx\n", i, fpr[i], val);
+			found = 0;
+		}
+	}
+
+	if (!found)
+		return TEST_FAIL;
+	return TEST_PASS;
+}
+
+/* Buffer must have 32 elements */
+int validate_fpr_float(float *fpr, float val)
+{
+	int i, found = 1;
+
+	for (i = 0; i < 32; i++) {
+		if (fpr[i] != val) {
+			printf("FPR[%d]: %f Expected: %f\n", i, fpr[i], val);
+			found = 0;
+		}
+	}
+
+	if (!found)
+		return TEST_FAIL;
+	return TEST_PASS;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
new file mode 100644
index 000000000..3066d310f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Ptrace test for hw breakpoints
+ *
+ * Based on tools/testing/selftests/breakpoints/breakpoint_test.c
+ *
+ * This test forks and the parent then traces the child doing various
+ * types of ptrace enabled breakpoints
+ *
+ * Copyright (C) 2018 Michael Neuling, IBM Corporation.
+ */
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "ptrace.h"
+
+/* Breakpoint access modes */
+enum {
+	BP_X = 1,
+	BP_RW = 2,
+	BP_W = 4,
+};
+
+static pid_t child_pid;
+static struct ppc_debug_info dbginfo;
+
+static void get_dbginfo(void)
+{
+	int ret;
+
+	ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
+	if (ret) {
+		perror("Can't get breakpoint info\n");
+		exit(-1);
+	}
+}
+
+static bool hwbreak_present(void)
+{
+	return (dbginfo.num_data_bps != 0);
+}
+
+static bool dawr_present(void)
+{
+	return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
+}
+
+static void set_breakpoint_addr(void *addr)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr);
+	if (ret) {
+		perror("Can't set breakpoint addr\n");
+		exit(-1);
+	}
+}
+
+static int set_hwbreakpoint_addr(void *addr, int range)
+{
+	int ret;
+
+	struct ppc_hw_breakpoint info;
+
+	info.version = 1;
+	info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
+	info.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+	if (range > 0)
+		info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+	info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+	info.addr = (__u64)addr;
+	info.addr2 = (__u64)addr + range;
+	info.condition_value = 0;
+
+	ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
+	if (ret < 0) {
+		perror("Can't set breakpoint\n");
+		exit(-1);
+	}
+	return ret;
+}
+
+static int del_hwbreakpoint_addr(int watchpoint_handle)
+{
+	int ret;
+
+	ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle);
+	if (ret < 0) {
+		perror("Can't delete hw breakpoint\n");
+		exit(-1);
+	}
+	return ret;
+}
+
+#define DAWR_LENGTH_MAX 512
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long
+	dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)]
+	__attribute__((aligned(512)));
+static unsigned long long *dummy_var = dummy_array;
+
+static void write_var(int len)
+{
+	long long *plval;
+	char *pcval;
+	short *psval;
+	int *pival;
+
+	switch (len) {
+	case 1:
+		pcval = (char *)dummy_var;
+		*pcval = 0xff;
+		break;
+	case 2:
+		psval = (short *)dummy_var;
+		*psval = 0xffff;
+		break;
+	case 4:
+		pival = (int *)dummy_var;
+		*pival = 0xffffffff;
+		break;
+	case 8:
+		plval = (long long *)dummy_var;
+		*plval = 0xffffffffffffffffLL;
+		break;
+	}
+}
+
+static void read_var(int len)
+{
+	char cval __attribute__((unused));
+	short sval __attribute__((unused));
+	int ival __attribute__((unused));
+	long long lval __attribute__((unused));
+
+	switch (len) {
+	case 1:
+		cval = *(char *)dummy_var;
+		break;
+	case 2:
+		sval = *(short *)dummy_var;
+		break;
+	case 4:
+		ival = *(int *)dummy_var;
+		break;
+	case 8:
+		lval = *(long long *)dummy_var;
+		break;
+	}
+}
+
+/*
+ * Do the r/w accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+	int len, ret;
+
+	ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+	if (ret) {
+		perror("Can't be traced?\n");
+		return;
+	}
+
+	/* Wake up father so that it sets up the first test */
+	kill(getpid(), SIGUSR1);
+
+	/* Test write watchpoints */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		write_var(len);
+
+	/* Test read/write watchpoints (on read accesses) */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		read_var(len);
+
+	/* Test when breakpoint is unset */
+
+	/* Test write watchpoints */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		write_var(len);
+
+	/* Test read/write watchpoints (on read accesses) */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		read_var(len);
+}
+
+static void check_success(const char *msg)
+{
+	const char *msg2;
+	int status;
+
+	/* Wait for the child to SIGTRAP */
+	wait(&status);
+
+	msg2 = "Failed";
+
+	if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
+		msg2 = "Child process hit the breakpoint";
+	}
+
+	printf("%s Result: [%s]\n", msg, msg2);
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+			       struct ppc_debug_info *dbginfo, bool dawr)
+{
+	const char *mode_str;
+	unsigned long data = (unsigned long)(dummy_var);
+	int wh, range;
+
+	data &= ~0x7UL;
+
+	if (mode == BP_W) {
+		data |= (1UL << 1);
+		mode_str = "write";
+	} else {
+		data |= (1UL << 0);
+		data |= (1UL << 1);
+		mode_str = "read";
+	}
+
+	/* Set DABR_TRANSLATION bit */
+	data |= (1UL << 2);
+
+	/* use PTRACE_SET_DEBUGREG breakpoints */
+	set_breakpoint_addr((void *)data);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+	check_success(buf);
+	/* Unregister hw brkpoint */
+	set_breakpoint_addr(NULL);
+
+	data = (data & ~7); /* remove dabr control bits */
+
+	/* use PPC_PTRACE_SETHWDEBUG breakpoint */
+	if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
+		return; /* not supported */
+	wh = set_hwbreakpoint_addr((void *)data, 0);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+	check_success(buf);
+	/* Unregister hw brkpoint */
+	del_hwbreakpoint_addr(wh);
+
+	/* try a wider range */
+	range = 8;
+	if (dawr)
+		range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1));
+	wh = set_hwbreakpoint_addr((void *)data, range);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+	check_success(buf);
+	/* Unregister hw brkpoint */
+	del_hwbreakpoint_addr(wh);
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static int launch_tests(bool dawr)
+{
+	char buf[1024];
+	int len, i, status;
+
+	struct ppc_debug_info dbginfo;
+
+	i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
+	if (i) {
+		perror("Can't set breakpoint info\n");
+		exit(-1);
+	}
+	if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
+		printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n");
+
+	/* Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		launch_watchpoints(buf, BP_W, len, &dbginfo, dawr);
+
+	/* Read-Write watchpoint */
+	for (len = 1; len <= sizeof(long); len <<= 1)
+		launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr);
+
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+
+	/*
+	 * Now we have unregistered the breakpoint, access by child
+	 * should not cause SIGTRAP.
+	 */
+
+	wait(&status);
+
+	if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
+		printf("FAIL: Child process hit the breakpoint, which is not expected\n");
+		ptrace(PTRACE_CONT, child_pid, NULL, 0);
+		return TEST_FAIL;
+	}
+
+	if (WIFEXITED(status))
+		printf("Child exited normally\n");
+
+	return TEST_PASS;
+}
+
+static int ptrace_hwbreak(void)
+{
+	pid_t pid;
+	int ret;
+	bool dawr;
+
+	pid = fork();
+	if (!pid) {
+		trigger_tests();
+		return 0;
+	}
+
+	wait(NULL);
+
+	child_pid = pid;
+
+	get_dbginfo();
+	SKIP_IF(!hwbreak_present());
+	dawr = dawr_present();
+
+	ret = launch_tests(dawr);
+
+	wait(NULL);
+
+	return ret;
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
new file mode 100644
index 000000000..3694613f4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc		384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free		385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY		0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE	0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+static const char user_read[] = "[User Read (Running)]";
+static const char user_write[] = "[User Write (Running)]";
+static const char ptrace_read_running[] = "[Ptrace Read (Running)]";
+static const char ptrace_write_running[] = "[Ptrace Write (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+	struct child_sync child_sync;
+
+	/* AMR value the parent expects to read from the child. */
+	unsigned long amr1;
+
+	/* AMR value the parent is expected to write to the child. */
+	unsigned long amr2;
+
+	/* AMR value that ptrace should refuse to write to the child. */
+	unsigned long invalid_amr;
+
+	/* IAMR value the parent expects to read from the child. */
+	unsigned long expected_iamr;
+
+	/* UAMOR value the parent expects to read from the child. */
+	unsigned long expected_uamor;
+
+	/*
+	 * IAMR and UAMOR values that ptrace should refuse to write to the child
+	 * (even though they're valid ones) because userspace doesn't have
+	 * access to those registers.
+	 */
+	unsigned long invalid_iamr;
+	unsigned long invalid_uamor;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+	return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int child(struct shared_info *info)
+{
+	unsigned long reg;
+	bool disable_execute = true;
+	int pkey1, pkey2, pkey3;
+	int ret;
+
+	/* Wait until parent fills out the initial register values. */
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Get some pkeys so that we can change their bits in the AMR. */
+	pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+	if (pkey1 < 0) {
+		pkey1 = sys_pkey_alloc(0, 0);
+		CHILD_FAIL_IF(pkey1 < 0, &info->child_sync);
+
+		disable_execute = false;
+	}
+
+	pkey2 = sys_pkey_alloc(0, 0);
+	CHILD_FAIL_IF(pkey2 < 0, &info->child_sync);
+
+	pkey3 = sys_pkey_alloc(0, 0);
+	CHILD_FAIL_IF(pkey3 < 0, &info->child_sync);
+
+	info->amr1 |= 3ul << pkeyshift(pkey1);
+	info->amr2 |= 3ul << pkeyshift(pkey2);
+	/*
+	 * invalid amr value where we try to force write
+	 * things which are deined by a uamor setting.
+	 */
+	info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor);
+
+	/*
+	 * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr
+	 */
+	if (disable_execute)
+		info->expected_iamr |= 1ul << pkeyshift(pkey1);
+	else
+		info->expected_iamr &= ~(1ul << pkeyshift(pkey1));
+
+	/*
+	 * We allocated pkey2 and pkey 3 above. Clear the IAMR bits.
+	 */
+	info->expected_iamr &= ~(1ul << pkeyshift(pkey2));
+	info->expected_iamr &= ~(1ul << pkeyshift(pkey3));
+
+	/*
+	 * Create an IAMR value different from expected value.
+	 * Kernel will reject an IAMR and UAMOR change.
+	 */
+	info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2));
+	info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1));
+
+	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+	       user_write, info->amr1, pkey1, pkey2, pkey3);
+
+	mtspr(SPRN_AMR, info->amr1);
+
+	/* Wait for parent to read our AMR value and write a new one. */
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	reg = mfspr(SPRN_AMR);
+
+	printf("%-30s AMR: %016lx\n", user_read, reg);
+
+	CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+	/*
+	 * Wait for parent to try to write an invalid AMR value.
+	 */
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	reg = mfspr(SPRN_AMR);
+
+	printf("%-30s AMR: %016lx\n", user_read, reg);
+
+	CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+	/*
+	 * Wait for parent to try to write an IAMR and a UAMOR value. We can't
+	 * verify them, but we can verify that the AMR didn't change.
+	 */
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_parent(&info->child_sync);
+	if (ret)
+		return ret;
+
+	reg = mfspr(SPRN_AMR);
+
+	printf("%-30s AMR: %016lx\n", user_read, reg);
+
+	CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+	/* Now let parent now that we are finished. */
+
+	ret = prod_parent(&info->child_sync);
+	CHILD_FAIL_IF(ret, &info->child_sync);
+
+	return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+	unsigned long regs[3];
+	int ret, status;
+
+	/*
+	 * Get the initial values for AMR, IAMR and UAMOR and communicate them
+	 * to the child.
+	 */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	info->amr1 = info->amr2 = regs[0];
+	info->expected_iamr = regs[1];
+	info->expected_uamor = regs[2];
+
+	/* Wake up child so that it can set itself up. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_child(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Verify that we can read the pkey registers from the child. */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       ptrace_read_running, regs[0], regs[1], regs[2]);
+
+	PARENT_FAIL_IF(regs[0] != info->amr1, &info->child_sync);
+	PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+	PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+	/* Write valid AMR value in child. */
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr2, 1);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr2);
+
+	/* Wake up child so that it can verify it changed. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_child(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Write invalid AMR value in child. */
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr);
+
+	/* Wake up child so that it can verify it didn't change. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait_child(&info->child_sync);
+	if (ret)
+		return ret;
+
+	/* Try to write to IAMR. */
+	regs[0] = info->amr1;
+	regs[1] = info->invalid_iamr;
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
+	PARENT_FAIL_IF(!ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx\n",
+	       ptrace_write_running, regs[0], regs[1]);
+
+	/* Try to write to IAMR and UAMOR. */
+	regs[2] = info->invalid_uamor;
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_FAIL_IF(!ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       ptrace_write_running, regs[0], regs[1], regs[2]);
+
+	/* Verify that all registers still have their expected values. */
+	ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+	       ptrace_read_running, regs[0], regs[1], regs[2]);
+
+	PARENT_FAIL_IF(regs[0] != info->amr2, &info->child_sync);
+	PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+	PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+	/* Wake up child so that it can verify AMR didn't change and wrap up. */
+	ret = prod_child(&info->child_sync);
+	PARENT_FAIL_IF(ret, &info->child_sync);
+
+	ret = wait(&status);
+	if (ret != pid) {
+		printf("Child's exit status not captured\n");
+		ret = TEST_PASS;
+	} else if (!WIFEXITED(status)) {
+		printf("Child exited abnormally\n");
+		ret = TEST_FAIL;
+	} else
+		ret = WEXITSTATUS(status) ? TEST_FAIL : TEST_PASS;
+
+	return ret;
+}
+
+static int ptrace_pkey(void)
+{
+	struct shared_info *info;
+	int shm_id;
+	int ret;
+	pid_t pid;
+
+	shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+	info = shmat(shm_id, NULL, 0);
+
+	ret = init_child_sync(&info->child_sync);
+	if (ret)
+		return ret;
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		ret = TEST_FAIL;
+	} else if (pid == 0)
+		ret = child(info);
+	else
+		ret = parent(info, pid);
+
+	shmdt(info);
+
+	if (pid) {
+		destroy_child_sync(&info->child_sync);
+		shmctl(shm_id, IPC_RMID, NULL);
+	}
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_pkey, "ptrace_pkey");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
new file mode 100644
index 000000000..f9b5069db
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -0,0 +1,135 @@
+/*
+ * Ptrace test for TAR, PPR, DSCR registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-tar.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr;
+int *pptr;
+
+void tar(void)
+{
+	unsigned long reg[3];
+	int ret;
+
+	cptr = (int *)shmat(shm_id, NULL, 0);
+	printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+			user_write, TAR_1, PPR_1, DSCR_1);
+
+	mtspr(SPRN_TAR, TAR_1);
+	mtspr(SPRN_PPR, PPR_1);
+	mtspr(SPRN_DSCR, DSCR_1);
+
+	cptr[2] = 1;
+
+	/* Wait on parent */
+	while (!cptr[0])
+		asm volatile("" : : : "memory");
+
+	reg[0] = mfspr(SPRN_TAR);
+	reg[1] = mfspr(SPRN_PPR);
+	reg[2] = mfspr(SPRN_DSCR);
+
+	printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+			user_read, reg[0], reg[1], reg[2]);
+
+	/* Unblock the parent now */
+	cptr[1] = 1;
+	shmdt((int *)cptr);
+
+	ret = validate_tar_registers(reg, TAR_2, PPR_2, DSCR_2);
+	if (ret)
+		exit(1);
+	exit(0);
+}
+
+int trace_tar(pid_t child)
+{
+	unsigned long reg[3];
+
+	FAIL_IF(start_trace(child));
+	FAIL_IF(show_tar_registers(child, reg));
+	printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+			ptrace_read_running, reg[0], reg[1], reg[2]);
+
+	FAIL_IF(validate_tar_registers(reg, TAR_1, PPR_1, DSCR_1));
+	FAIL_IF(stop_trace(child));
+	return TEST_PASS;
+}
+
+int trace_tar_write(pid_t child)
+{
+	FAIL_IF(start_trace(child));
+	FAIL_IF(write_tar_registers(child, TAR_2, PPR_2, DSCR_2));
+	printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+			ptrace_write_running, TAR_2, PPR_2, DSCR_2);
+
+	FAIL_IF(stop_trace(child));
+	return TEST_PASS;
+}
+
+int ptrace_tar(void)
+{
+	pid_t pid;
+	int ret, status;
+
+	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		return TEST_FAIL;
+	}
+
+	if (pid == 0)
+		tar();
+
+	if (pid) {
+		pptr = (int *)shmat(shm_id, NULL, 0);
+		pptr[0] = 0;
+		pptr[1] = 0;
+
+		while (!pptr[2])
+			asm volatile("" : : : "memory");
+		ret = trace_tar(pid);
+		if (ret)
+			return ret;
+
+		ret = trace_tar_write(pid);
+		if (ret)
+			return ret;
+
+		/* Unblock the child now */
+		pptr[0] = 1;
+
+		/* Wait on child */
+		while (!pptr[1])
+			asm volatile("" : : : "memory");
+
+		shmdt((int *)pptr);
+
+		ret = wait(&status);
+		shmctl(shm_id, IPC_RMID, NULL);
+		if (ret != pid) {
+			printf("Child's exit status not captured\n");
+			return TEST_PASS;
+		}
+
+		return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+			TEST_PASS;
+	}
+	return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_tar, "ptrace_tar");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.h b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.h
new file mode 100644
index 000000000..aed0aac71
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define TAR_1   10
+#define TAR_2   20
+#define TAR_3   30
+#define TAR_4   40
+#define TAR_5   50
+
+#define DSCR_1  100
+#define DSCR_2  200
+#define DSCR_3  300
+#define DSCR_4  400
+#define DSCR_5  500
+
+#define PPR_1   0x4000000000000         /* or 31,31,31*/
+#define PPR_2   0x8000000000000         /* or 1,1,1 */
+#define PPR_3   0xc000000000000         /* or 6,6,6 */
+#define PPR_4   0x10000000000000        /* or 2,2,2 */
+
+char *user_read = "[User Read (Running)]";
+char *user_write = "[User Write (Running)]";
+char *ptrace_read_running = "[Ptrace Read (Running)]";
+char *ptrace_write_running = "[Ptrace Write (Running)]";
+char *ptrace_read_ckpt = "[Ptrace Read (Checkpointed)]";
+char *ptrace_write_ckpt = "[Ptrace Write (Checkpointed)]";
+
+int validate_tar_registers(unsigned long *reg, unsigned long tar,
+				unsigned long ppr, unsigned long dscr)
+{
+	int match = 1;
+
+	if (reg[0] != tar)
+		match = 0;
+
+	if (reg[1] != ppr)
+		match = 0;
+
+	if (reg[2] != dscr)
+		match = 0;
+
+	if (!match)
+		return TEST_FAIL;
+	return TEST_PASS;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
new file mode 100644
index 000000000..a08a91594
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
@@ -0,0 +1,158 @@
+/*
+ * Ptrace test for GPR/FPR registers in TM context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-gpr.h"
+#include "tm.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+unsigned long *cptr, *pptr;
+
+float a = FPR_1;
+float b = FPR_2;
+float c = FPR_3;
+
+void tm_gpr(void)
+{
+	unsigned long gpr_buf[18];
+	unsigned long result, texasr;
+	float fpr_buf[32];
+
+	printf("Starting the child\n");
+	cptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+trans:
+	cptr[1] = 0;
+	asm __volatile__(
+		ASM_LOAD_GPR_IMMED(gpr_1)
+		ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+		"1: ;"
+		"tbegin.;"
+		"beq 2f;"
+		ASM_LOAD_GPR_IMMED(gpr_2)
+		ASM_LOAD_FPR_SINGLE_PRECISION(flt_2)
+		"tsuspend.;"
+		"li 7, 1;"
+		"stw 7, 0(%[cptr1]);"
+		"tresume.;"
+		"b .;"
+
+		"tend.;"
+		"li 0, 0;"
+		"ori %[res], 0, 0;"
+		"b 3f;"
+
+		/* Transaction abort handler */
+		"2: ;"
+		"li 0, 1;"
+		"ori %[res], 0, 0;"
+		"mfspr %[texasr], %[sprn_texasr];"
+
+		"3: ;"
+		: [res] "=r" (result), [texasr] "=r" (texasr)
+		: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2),
+		[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+		[flt_2] "b" (&b), [cptr1] "b" (&cptr[1])
+		: "memory", "r7", "r8", "r9", "r10",
+		"r11", "r12", "r13", "r14", "r15", "r16",
+		"r17", "r18", "r19", "r20", "r21", "r22",
+		"r23", "r24", "r25", "r26", "r27", "r28",
+		"r29", "r30", "r31"
+		);
+
+	if (result) {
+		if (!cptr[0])
+			goto trans;
+
+		shmdt((void *)cptr);
+		store_gpr(gpr_buf);
+		store_fpr_single_precision(fpr_buf);
+
+		if (validate_gpr(gpr_buf, GPR_3))
+			exit(1);
+
+		if (validate_fpr_float(fpr_buf, c))
+			exit(1);
+
+		exit(0);
+	}
+	shmdt((void *)cptr);
+	exit(1);
+}
+
+int trace_tm_gpr(pid_t child)
+{
+	unsigned long gpr[18];
+	unsigned long fpr[32];
+
+	FAIL_IF(start_trace(child));
+	FAIL_IF(show_gpr(child, gpr));
+	FAIL_IF(validate_gpr(gpr, GPR_2));
+	FAIL_IF(show_fpr(child, fpr));
+	FAIL_IF(validate_fpr(fpr, FPR_2_REP));
+	FAIL_IF(show_ckpt_fpr(child, fpr));
+	FAIL_IF(validate_fpr(fpr, FPR_1_REP));
+	FAIL_IF(show_ckpt_gpr(child, gpr));
+	FAIL_IF(validate_gpr(gpr, GPR_1));
+	FAIL_IF(write_ckpt_gpr(child, GPR_3));
+	FAIL_IF(write_ckpt_fpr(child, FPR_3_REP));
+
+	pptr[0] = 1;
+	FAIL_IF(stop_trace(child));
+
+	return TEST_PASS;
+}
+
+int ptrace_tm_gpr(void)
+{
+	pid_t pid;
+	int ret, status;
+
+	SKIP_IF(!have_htm());
+	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		return TEST_FAIL;
+	}
+	if (pid == 0)
+		tm_gpr();
+
+	if (pid) {
+		pptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+		while (!pptr[1])
+			asm volatile("" : : : "memory");
+		ret = trace_tm_gpr(pid);
+		if (ret) {
+			kill(pid, SIGTERM);
+			return TEST_FAIL;
+		}
+
+		shmdt((void *)pptr);
+
+		ret = wait(&status);
+		shmctl(shm_id, IPC_RMID, NULL);
+		if (ret != pid) {
+			printf("Child's exit status not captured\n");
+			return TEST_FAIL;
+		}
+
+		return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+			TEST_PASS;
+	}
+	return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_tm_gpr, "ptrace_tm_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
new file mode 100644
index 000000000..dbdffa2e2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -0,0 +1,169 @@
+/*
+ * Ptrace test for GPR/FPR registers in TM Suspend context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-gpr.h"
+#include "tm.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr, *pptr;
+
+float a = FPR_1;
+float b = FPR_2;
+float c = FPR_3;
+float d = FPR_4;
+
+__attribute__((used)) void wait_parent(void)
+{
+	cptr[2] = 1;
+	while (!cptr[1])
+		asm volatile("" : : : "memory");
+}
+
+void tm_spd_gpr(void)
+{
+	unsigned long gpr_buf[18];
+	unsigned long result, texasr;
+	float fpr_buf[32];
+
+	cptr = (int *)shmat(shm_id, NULL, 0);
+
+trans:
+	cptr[2] = 0;
+	asm __volatile__(
+		ASM_LOAD_GPR_IMMED(gpr_1)
+		ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+
+		"1: ;"
+		"tbegin.;"
+		"beq 2f;"
+
+		ASM_LOAD_GPR_IMMED(gpr_2)
+		"tsuspend.;"
+		ASM_LOAD_GPR_IMMED(gpr_4)
+		ASM_LOAD_FPR_SINGLE_PRECISION(flt_4)
+
+		"bl wait_parent;"
+		"tresume.;"
+		"tend.;"
+		"li 0, 0;"
+		"ori %[res], 0, 0;"
+		"b 3f;"
+
+		/* Transaction abort handler */
+		"2: ;"
+		"li 0, 1;"
+		"ori %[res], 0, 0;"
+		"mfspr %[texasr], %[sprn_texasr];"
+
+		"3: ;"
+		: [res] "=r" (result), [texasr] "=r" (texasr)
+		: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
+		[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+		[flt_4] "b" (&d)
+		: "memory", "r5", "r6", "r7",
+		"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+		"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+		"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
+		);
+
+	if (result) {
+		if (!cptr[0])
+			goto trans;
+
+		shmdt((void *)cptr);
+		store_gpr(gpr_buf);
+		store_fpr_single_precision(fpr_buf);
+
+		if (validate_gpr(gpr_buf, GPR_3))
+			exit(1);
+
+		if (validate_fpr_float(fpr_buf, c))
+			exit(1);
+		exit(0);
+	}
+	shmdt((void *)cptr);
+	exit(1);
+}
+
+int trace_tm_spd_gpr(pid_t child)
+{
+	unsigned long gpr[18];
+	unsigned long fpr[32];
+
+	FAIL_IF(start_trace(child));
+	FAIL_IF(show_gpr(child, gpr));
+	FAIL_IF(validate_gpr(gpr, GPR_4));
+	FAIL_IF(show_fpr(child, fpr));
+	FAIL_IF(validate_fpr(fpr, FPR_4_REP));
+	FAIL_IF(show_ckpt_fpr(child, fpr));
+	FAIL_IF(validate_fpr(fpr, FPR_1_REP));
+	FAIL_IF(show_ckpt_gpr(child, gpr));
+	FAIL_IF(validate_gpr(gpr, GPR_1));
+	FAIL_IF(write_ckpt_gpr(child, GPR_3));
+	FAIL_IF(write_ckpt_fpr(child, FPR_3_REP));
+
+	pptr[0] = 1;
+	pptr[1] = 1;
+	FAIL_IF(stop_trace(child));
+	return TEST_PASS;
+}
+
+int ptrace_tm_spd_gpr(void)
+{
+	pid_t pid;
+	int ret, status;
+
+	SKIP_IF(!have_htm());
+	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		return TEST_FAIL;
+	}
+
+	if (pid == 0)
+		tm_spd_gpr();
+
+	if (pid) {
+		pptr = (int *)shmat(shm_id, NULL, 0);
+		pptr[0] = 0;
+		pptr[1] = 0;
+
+		while (!pptr[2])
+			asm volatile("" : : : "memory");
+		ret = trace_tm_spd_gpr(pid);
+		if (ret) {
+			kill(pid, SIGTERM);
+			shmdt((void *)pptr);
+			shmctl(shm_id, IPC_RMID, NULL);
+			return TEST_FAIL;
+		}
+
+		shmdt((void *)pptr);
+
+		ret = wait(&status);
+		shmctl(shm_id, IPC_RMID, NULL);
+		if (ret != pid) {
+			printf("Child's exit status not captured\n");
+			return TEST_FAIL;
+		}
+
+		return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+			TEST_PASS;
+	}
+	return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_tm_spd_gpr, "ptrace_tm_spd_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
new file mode 100644
index 000000000..f47174746
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
@@ -0,0 +1,174 @@
+/*
+ * Ptrace test for TAR, PPR, DSCR registers in the TM Suspend context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-tar.h"
+
+int shm_id;
+int *cptr, *pptr;
+
+__attribute__((used)) void wait_parent(void)
+{
+	cptr[2] = 1;
+	while (!cptr[1])
+		asm volatile("" : : : "memory");
+}
+
+void tm_spd_tar(void)
+{
+	unsigned long result, texasr;
+	unsigned long regs[3];
+	int ret;
+
+	cptr = (int *)shmat(shm_id, NULL, 0);
+
+trans:
+	cptr[2] = 0;
+	asm __volatile__(
+		"li	4, %[tar_1];"
+		"mtspr %[sprn_tar],  4;"	/* TAR_1 */
+		"li	4, %[dscr_1];"
+		"mtspr %[sprn_dscr], 4;"	/* DSCR_1 */
+		"or     31,31,31;"		/* PPR_1*/
+
+		"1: ;"
+		"tbegin.;"
+		"beq 2f;"
+
+		"li	4, %[tar_2];"
+		"mtspr %[sprn_tar],  4;"	/* TAR_2 */
+		"li	4, %[dscr_2];"
+		"mtspr %[sprn_dscr], 4;"	/* DSCR_2 */
+		"or     1,1,1;"			/* PPR_2 */
+
+		"tsuspend.;"
+		"li	4, %[tar_3];"
+		"mtspr %[sprn_tar],  4;"	/* TAR_3 */
+		"li	4, %[dscr_3];"
+		"mtspr %[sprn_dscr], 4;"	/* DSCR_3 */
+		"or     6,6,6;"			/* PPR_3 */
+		"bl wait_parent;"
+		"tresume.;"
+
+		"tend.;"
+		"li 0, 0;"
+		"ori %[res], 0, 0;"
+		"b 3f;"
+
+		/* Transaction abort handler */
+		"2: ;"
+		"li 0, 1;"
+		"ori %[res], 0, 0;"
+		"mfspr %[texasr], %[sprn_texasr];"
+
+		"3: ;"
+
+		: [res] "=r" (result), [texasr] "=r" (texasr)
+		: [sprn_dscr]"i"(SPRN_DSCR),
+		[sprn_tar]"i"(SPRN_TAR), [sprn_ppr]"i"(SPRN_PPR),
+		[sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1),
+		[dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2),
+		[tar_3]"i"(TAR_3), [dscr_3]"i"(DSCR_3)
+		: "memory", "r0", "r1", "r3", "r4", "r5", "r6"
+		);
+
+	/* TM failed, analyse */
+	if (result) {
+		if (!cptr[0])
+			goto trans;
+
+		regs[0] = mfspr(SPRN_TAR);
+		regs[1] = mfspr(SPRN_PPR);
+		regs[2] = mfspr(SPRN_DSCR);
+
+		shmdt(&cptr);
+		printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+				user_read, regs[0], regs[1], regs[2]);
+
+		ret = validate_tar_registers(regs, TAR_4, PPR_4, DSCR_4);
+		if (ret)
+			exit(1);
+		exit(0);
+	}
+	shmdt(&cptr);
+	exit(1);
+}
+
+int trace_tm_spd_tar(pid_t child)
+{
+	unsigned long regs[3];
+
+	FAIL_IF(start_trace(child));
+	FAIL_IF(show_tar_registers(child, regs));
+	printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+			ptrace_read_running, regs[0], regs[1], regs[2]);
+
+	FAIL_IF(validate_tar_registers(regs, TAR_3, PPR_3, DSCR_3));
+	FAIL_IF(show_tm_checkpointed_state(child, regs));
+	printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+			ptrace_read_ckpt, regs[0], regs[1], regs[2]);
+
+	FAIL_IF(validate_tar_registers(regs, TAR_1, PPR_1, DSCR_1));
+	FAIL_IF(write_ckpt_tar_registers(child, TAR_4, PPR_4, DSCR_4));
+	printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+			ptrace_write_ckpt, TAR_4, PPR_4, DSCR_4);
+
+	pptr[0] = 1;
+	pptr[1] = 1;
+	FAIL_IF(stop_trace(child));
+	return TEST_PASS;
+}
+
+int ptrace_tm_spd_tar(void)
+{
+	pid_t pid;
+	int ret, status;
+
+	SKIP_IF(!have_htm());
+	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+	pid = fork();
+	if (pid == 0)
+		tm_spd_tar();
+
+	pptr = (int *)shmat(shm_id, NULL, 0);
+	pptr[0] = 0;
+	pptr[1] = 0;
+
+	if (pid) {
+		while (!pptr[2])
+			asm volatile("" : : : "memory");
+		ret = trace_tm_spd_tar(pid);
+		if (ret) {
+			kill(pid, SIGTERM);
+			shmdt(&pptr);
+			shmctl(shm_id, IPC_RMID, NULL);
+			return TEST_FAIL;
+		}
+
+		shmdt(&pptr);
+
+		ret = wait(&status);
+		shmctl(shm_id, IPC_RMID, NULL);
+		if (ret != pid) {
+			printf("Child's exit status not captured\n");
+			return TEST_FAIL;
+		}
+
+		return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+			TEST_PASS;
+	}
+	return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_tm_spd_tar, "ptrace_tm_spd_tar");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
new file mode 100644
index 000000000..18a685bf6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -0,0 +1,184 @@
+/*
+ * Ptrace test for VMX/VSX registers in the TM Suspend context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-vsx.h"
+
+int shm_id;
+int *cptr, *pptr;
+
+unsigned long fp_load[VEC_MAX];
+unsigned long fp_load_new[VEC_MAX];
+unsigned long fp_store[VEC_MAX];
+unsigned long fp_load_ckpt[VEC_MAX];
+unsigned long fp_load_ckpt_new[VEC_MAX];
+
+__attribute__((used)) void load_vsx(void)
+{
+	loadvsx(fp_load, 0);
+}
+
+__attribute__((used)) void load_vsx_new(void)
+{
+	loadvsx(fp_load_new, 0);
+}
+
+__attribute__((used)) void load_vsx_ckpt(void)
+{
+	loadvsx(fp_load_ckpt, 0);
+}
+
+__attribute__((used)) void wait_parent(void)
+{
+	cptr[2] = 1;
+	while (!cptr[1])
+		asm volatile("" : : : "memory");
+}
+
+void tm_spd_vsx(void)
+{
+	unsigned long result, texasr;
+	int ret;
+
+	cptr = (int *)shmat(shm_id, NULL, 0);
+
+trans:
+	cptr[2] = 0;
+	asm __volatile__(
+		"bl load_vsx_ckpt;"
+
+		"1: ;"
+		"tbegin.;"
+		"beq 2f;"
+
+		"bl load_vsx_new;"
+		"tsuspend.;"
+		"bl load_vsx;"
+		"bl wait_parent;"
+		"tresume.;"
+
+		"tend.;"
+		"li 0, 0;"
+		"ori %[res], 0, 0;"
+		"b 3f;"
+
+		"2: ;"
+		"li 0, 1;"
+		"ori %[res], 0, 0;"
+		"mfspr %[texasr], %[sprn_texasr];"
+
+		"3: ;"
+		: [res] "=r" (result), [texasr] "=r" (texasr)
+		: [sprn_texasr] "i"  (SPRN_TEXASR)
+		: "memory", "r0", "r1", "r3", "r4",
+		"r7", "r8", "r9", "r10", "r11"
+		);
+
+	if (result) {
+		if (!cptr[0])
+			goto trans;
+		shmdt((void *)cptr);
+
+		storevsx(fp_store, 0);
+		ret = compare_vsx_vmx(fp_store, fp_load_ckpt_new);
+		if (ret)
+			exit(1);
+		exit(0);
+	}
+	shmdt((void *)cptr);
+	exit(1);
+}
+
+int trace_tm_spd_vsx(pid_t child)
+{
+	unsigned long vsx[VSX_MAX];
+	unsigned long vmx[VMX_MAX + 2][2];
+
+	FAIL_IF(start_trace(child));
+	FAIL_IF(show_vsx(child, vsx));
+	FAIL_IF(validate_vsx(vsx, fp_load));
+	FAIL_IF(show_vmx(child, vmx));
+	FAIL_IF(validate_vmx(vmx, fp_load));
+	FAIL_IF(show_vsx_ckpt(child, vsx));
+	FAIL_IF(validate_vsx(vsx, fp_load_ckpt));
+	FAIL_IF(show_vmx_ckpt(child, vmx));
+	FAIL_IF(validate_vmx(vmx, fp_load_ckpt));
+
+	memset(vsx, 0, sizeof(vsx));
+	memset(vmx, 0, sizeof(vmx));
+
+	load_vsx_vmx(fp_load_ckpt_new, vsx, vmx);
+
+	FAIL_IF(write_vsx_ckpt(child, vsx));
+	FAIL_IF(write_vmx_ckpt(child, vmx));
+
+	pptr[0] = 1;
+	pptr[1] = 1;
+	FAIL_IF(stop_trace(child));
+
+	return TEST_PASS;
+}
+
+int ptrace_tm_spd_vsx(void)
+{
+	pid_t pid;
+	int ret, status, i;
+
+	SKIP_IF(!have_htm());
+	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+
+	for (i = 0; i < 128; i++) {
+		fp_load[i] = 1 + rand();
+		fp_load_new[i] = 1 + 2 * rand();
+		fp_load_ckpt[i] = 1 + 3 * rand();
+		fp_load_ckpt_new[i] = 1 + 4 * rand();
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		return TEST_FAIL;
+	}
+
+	if (pid == 0)
+		tm_spd_vsx();
+
+	if (pid) {
+		pptr = (int *)shmat(shm_id, NULL, 0);
+		while (!pptr[2])
+			asm volatile("" : : : "memory");
+
+		ret = trace_tm_spd_vsx(pid);
+		if (ret) {
+			kill(pid, SIGKILL);
+			shmdt((void *)pptr);
+			shmctl(shm_id, IPC_RMID, NULL);
+			return TEST_FAIL;
+		}
+
+		shmdt((void *)pptr);
+		ret = wait(&status);
+		shmctl(shm_id, IPC_RMID, NULL);
+		if (ret != pid) {
+			printf("Child's exit status not captured\n");
+			return TEST_FAIL;
+		}
+
+		return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+			TEST_PASS;
+	}
+	return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_tm_spd_vsx, "ptrace_tm_spd_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
new file mode 100644
index 000000000..ba0499925
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -0,0 +1,167 @@
+/*
+ * Ptrace test TM SPR registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+
+/* Tracee and tracer shared data */
+struct shared {
+	int flag;
+	struct tm_spr_regs regs;
+};
+unsigned long tfhar;
+
+int shm_id;
+struct shared *cptr, *pptr;
+
+int shm_id1;
+int *cptr1, *pptr1;
+
+#define TM_KVM_SCHED   0xe0000001ac000001
+int validate_tm_spr(struct tm_spr_regs *regs)
+{
+	FAIL_IF(regs->tm_tfhar != tfhar);
+	FAIL_IF((regs->tm_texasr == TM_KVM_SCHED) && (regs->tm_tfiar != 0));
+
+	return TEST_PASS;
+}
+
+void tm_spr(void)
+{
+	unsigned long result, texasr;
+	int ret;
+
+	cptr = (struct shared *)shmat(shm_id, NULL, 0);
+	cptr1 = (int *)shmat(shm_id1, NULL, 0);
+
+trans:
+	cptr1[0] = 0;
+	asm __volatile__(
+		"1: ;"
+		/* TM failover handler should follow "tbegin.;" */
+		"mflr 31;"
+		"bl 4f;"	/* $ = TFHAR - 12 */
+		"4: ;"
+		"mflr %[tfhar];"
+		"mtlr 31;"
+
+		"tbegin.;"
+		"beq 2f;"
+
+		"tsuspend.;"
+		"li 8, 1;"
+		"sth 8, 0(%[cptr1]);"
+		"tresume.;"
+		"b .;"
+
+		"tend.;"
+		"li 0, 0;"
+		"ori %[res], 0, 0;"
+		"b 3f;"
+
+		"2: ;"
+
+		"li 0, 1;"
+		"ori %[res], 0, 0;"
+		"mfspr %[texasr], %[sprn_texasr];"
+
+		"3: ;"
+		: [tfhar] "=r" (tfhar), [res] "=r" (result),
+		[texasr] "=r" (texasr), [cptr1] "=b" (cptr1)
+		: [sprn_texasr] "i"  (SPRN_TEXASR)
+		: "memory", "r0", "r8", "r31"
+		);
+
+	/* There are 2 32bit instructions before tbegin. */
+	tfhar += 12;
+
+	if (result) {
+		if (!cptr->flag)
+			goto trans;
+
+		ret = validate_tm_spr((struct tm_spr_regs *)&cptr->regs);
+		shmdt((void *)cptr);
+		shmdt((void *)cptr1);
+		if (ret)
+			exit(1);
+		exit(0);
+	}
+	shmdt((void *)cptr);
+	shmdt((void *)cptr1);
+	exit(1);
+}
+
+int trace_tm_spr(pid_t child)
+{
+	FAIL_IF(start_trace(child));
+	FAIL_IF(show_tm_spr(child, (struct tm_spr_regs *)&pptr->regs));
+
+	printf("TFHAR: %lx TEXASR: %lx TFIAR: %lx\n", pptr->regs.tm_tfhar,
+				pptr->regs.tm_texasr, pptr->regs.tm_tfiar);
+
+	pptr->flag = 1;
+	FAIL_IF(stop_trace(child));
+
+	return TEST_PASS;
+}
+
+int ptrace_tm_spr(void)
+{
+	pid_t pid;
+	int ret, status;
+
+	SKIP_IF(!have_htm());
+	shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT);
+	shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT);
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		return TEST_FAIL;
+	}
+
+	if (pid == 0)
+		tm_spr();
+
+	if (pid) {
+		pptr = (struct shared *)shmat(shm_id, NULL, 0);
+		pptr1 = (int *)shmat(shm_id1, NULL, 0);
+
+		while (!pptr1[0])
+			asm volatile("" : : : "memory");
+		ret = trace_tm_spr(pid);
+		if (ret) {
+			kill(pid, SIGKILL);
+			shmdt((void *)pptr);
+			shmdt((void *)pptr1);
+			shmctl(shm_id, IPC_RMID, NULL);
+			shmctl(shm_id1, IPC_RMID, NULL);
+			return TEST_FAIL;
+		}
+
+		shmdt((void *)pptr);
+		shmdt((void *)pptr1);
+		ret = wait(&status);
+		shmctl(shm_id, IPC_RMID, NULL);
+		shmctl(shm_id1, IPC_RMID, NULL);
+		if (ret != pid) {
+			printf("Child's exit status not captured\n");
+			return TEST_FAIL;
+		}
+
+		return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+			TEST_PASS;
+	}
+	return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_tm_spr, "ptrace_tm_spr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
new file mode 100644
index 000000000..f70023b25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
@@ -0,0 +1,160 @@
+/*
+ * Ptrace test for TAR, PPR, DSCR registers in the TM context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-tar.h"
+
+int shm_id;
+unsigned long *cptr, *pptr;
+
+
+void tm_tar(void)
+{
+	unsigned long result, texasr;
+	unsigned long regs[3];
+	int ret;
+
+	cptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+trans:
+	cptr[1] = 0;
+	asm __volatile__(
+		"li	4, %[tar_1];"
+		"mtspr %[sprn_tar],  4;"	/* TAR_1 */
+		"li	4, %[dscr_1];"
+		"mtspr %[sprn_dscr], 4;"	/* DSCR_1 */
+		"or     31,31,31;"		/* PPR_1*/
+
+		"1: ;"
+		"tbegin.;"
+		"beq 2f;"
+
+		"li	4, %[tar_2];"
+		"mtspr %[sprn_tar],  4;"	/* TAR_2 */
+		"li	4, %[dscr_2];"
+		"mtspr %[sprn_dscr], 4;"	/* DSCR_2 */
+		"or     1,1,1;"			/* PPR_2 */
+		"tsuspend.;"
+		"li 0, 1;"
+		"stw 0, 0(%[cptr1]);"
+		"tresume.;"
+		"b .;"
+
+		"tend.;"
+		"li 0, 0;"
+		"ori %[res], 0, 0;"
+		"b 3f;"
+
+		/* Transaction abort handler */
+		"2: ;"
+		"li 0, 1;"
+		"ori %[res], 0, 0;"
+		"mfspr %[texasr], %[sprn_texasr];"
+
+		"3: ;"
+
+		: [res] "=r" (result), [texasr] "=r" (texasr)
+		: [sprn_dscr]"i"(SPRN_DSCR), [sprn_tar]"i"(SPRN_TAR),
+		[sprn_ppr]"i"(SPRN_PPR), [sprn_texasr]"i"(SPRN_TEXASR),
+		[tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2),
+		[dscr_2]"i"(DSCR_2), [cptr1] "b" (&cptr[1])
+		: "memory", "r0", "r1", "r3", "r4", "r5", "r6"
+		);
+
+	/* TM failed, analyse */
+	if (result) {
+		if (!cptr[0])
+			goto trans;
+
+		regs[0] = mfspr(SPRN_TAR);
+		regs[1] = mfspr(SPRN_PPR);
+		regs[2] = mfspr(SPRN_DSCR);
+
+		shmdt(&cptr);
+		printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+				user_read, regs[0], regs[1], regs[2]);
+
+		ret = validate_tar_registers(regs, TAR_4, PPR_4, DSCR_4);
+		if (ret)
+			exit(1);
+		exit(0);
+	}
+	shmdt(&cptr);
+	exit(1);
+}
+
+int trace_tm_tar(pid_t child)
+{
+	unsigned long regs[3];
+
+	FAIL_IF(start_trace(child));
+	FAIL_IF(show_tar_registers(child, regs));
+	printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+			ptrace_read_running, regs[0], regs[1], regs[2]);
+
+	FAIL_IF(validate_tar_registers(regs, TAR_2, PPR_2, DSCR_2));
+	FAIL_IF(show_tm_checkpointed_state(child, regs));
+	printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+			ptrace_read_ckpt, regs[0], regs[1], regs[2]);
+
+	FAIL_IF(validate_tar_registers(regs, TAR_1, PPR_1, DSCR_1));
+	FAIL_IF(write_ckpt_tar_registers(child, TAR_4, PPR_4, DSCR_4));
+	printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+			ptrace_write_ckpt, TAR_4, PPR_4, DSCR_4);
+
+	pptr[0] = 1;
+	FAIL_IF(stop_trace(child));
+	return TEST_PASS;
+}
+
+int ptrace_tm_tar(void)
+{
+	pid_t pid;
+	int ret, status;
+
+	SKIP_IF(!have_htm());
+	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+	pid = fork();
+	if (pid == 0)
+		tm_tar();
+
+	pptr = (unsigned long *)shmat(shm_id, NULL, 0);
+	pptr[0] = 0;
+
+	if (pid) {
+		while (!pptr[1])
+			asm volatile("" : : : "memory");
+		ret = trace_tm_tar(pid);
+		if (ret) {
+			kill(pid, SIGTERM);
+			shmdt(&pptr);
+			shmctl(shm_id, IPC_RMID, NULL);
+			return TEST_FAIL;
+		}
+		shmdt(&pptr);
+
+		ret = wait(&status);
+		shmctl(shm_id, IPC_RMID, NULL);
+		if (ret != pid) {
+			printf("Child's exit status not captured\n");
+			return TEST_FAIL;
+		}
+
+		return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+			TEST_PASS;
+	}
+	return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_tm_tar, "ptrace_tm_tar");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
new file mode 100644
index 000000000..dfba80058
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -0,0 +1,167 @@
+/*
+ * Ptrace test for VMX/VSX registers in the TM context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-vsx.h"
+
+int shm_id;
+unsigned long *cptr, *pptr;
+
+unsigned long fp_load[VEC_MAX];
+unsigned long fp_store[VEC_MAX];
+unsigned long fp_load_ckpt[VEC_MAX];
+unsigned long fp_load_ckpt_new[VEC_MAX];
+
+__attribute__((used)) void load_vsx(void)
+{
+	loadvsx(fp_load, 0);
+}
+
+__attribute__((used)) void load_vsx_ckpt(void)
+{
+	loadvsx(fp_load_ckpt, 0);
+}
+
+void tm_vsx(void)
+{
+	unsigned long result, texasr;
+	int ret;
+
+	cptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+trans:
+	cptr[1] = 0;
+	asm __volatile__(
+		"bl load_vsx_ckpt;"
+
+		"1: ;"
+		"tbegin.;"
+		"beq 2f;"
+
+		"bl load_vsx;"
+		"tsuspend.;"
+		"li 7, 1;"
+		"stw 7, 0(%[cptr1]);"
+		"tresume.;"
+		"b .;"
+
+		"tend.;"
+		"li 0, 0;"
+		"ori %[res], 0, 0;"
+		"b 3f;"
+
+		"2: ;"
+		"li 0, 1;"
+		"ori %[res], 0, 0;"
+		"mfspr %[texasr], %[sprn_texasr];"
+
+		"3: ;"
+		: [res] "=r" (result), [texasr] "=r" (texasr)
+		: [sprn_texasr] "i"  (SPRN_TEXASR), [cptr1] "b" (&cptr[1])
+		: "memory", "r0", "r1", "r3", "r4",
+		"r7", "r8", "r9", "r10", "r11"
+		);
+
+	if (result) {
+		if (!cptr[0])
+			goto trans;
+
+		shmdt((void *)cptr);
+		storevsx(fp_store, 0);
+		ret = compare_vsx_vmx(fp_store, fp_load_ckpt_new);
+		if (ret)
+			exit(1);
+		exit(0);
+	}
+	shmdt((void *)cptr);
+	exit(1);
+}
+
+int trace_tm_vsx(pid_t child)
+{
+	unsigned long vsx[VSX_MAX];
+	unsigned long vmx[VMX_MAX + 2][2];
+
+	FAIL_IF(start_trace(child));
+	FAIL_IF(show_vsx(child, vsx));
+	FAIL_IF(validate_vsx(vsx, fp_load));
+	FAIL_IF(show_vmx(child, vmx));
+	FAIL_IF(validate_vmx(vmx, fp_load));
+	FAIL_IF(show_vsx_ckpt(child, vsx));
+	FAIL_IF(validate_vsx(vsx, fp_load_ckpt));
+	FAIL_IF(show_vmx_ckpt(child, vmx));
+	FAIL_IF(validate_vmx(vmx, fp_load_ckpt));
+	memset(vsx, 0, sizeof(vsx));
+	memset(vmx, 0, sizeof(vmx));
+
+	load_vsx_vmx(fp_load_ckpt_new, vsx, vmx);
+
+	FAIL_IF(write_vsx_ckpt(child, vsx));
+	FAIL_IF(write_vmx_ckpt(child, vmx));
+	pptr[0] = 1;
+	FAIL_IF(stop_trace(child));
+	return TEST_PASS;
+}
+
+int ptrace_tm_vsx(void)
+{
+	pid_t pid;
+	int ret, status, i;
+
+	SKIP_IF(!have_htm());
+	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+
+	for (i = 0; i < 128; i++) {
+		fp_load[i] = 1 + rand();
+		fp_load_ckpt[i] = 1 + 2 * rand();
+		fp_load_ckpt_new[i] = 1 + 3 * rand();
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		return TEST_FAIL;
+	}
+
+	if (pid == 0)
+		tm_vsx();
+
+	if (pid) {
+		pptr = (unsigned long *)shmat(shm_id, NULL, 0);
+		while (!pptr[1])
+			asm volatile("" : : : "memory");
+
+		ret = trace_tm_vsx(pid);
+		if (ret) {
+			kill(pid, SIGKILL);
+			shmdt((void *)pptr);
+			shmctl(shm_id, IPC_RMID, NULL);
+			return TEST_FAIL;
+		}
+
+		shmdt((void *)pptr);
+		ret = wait(&status);
+		shmctl(shm_id, IPC_RMID, NULL);
+		if (ret != pid) {
+			printf("Child's exit status not captured\n");
+			return TEST_FAIL;
+		}
+
+		return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+			TEST_PASS;
+	}
+	return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_tm_vsx, "ptrace_tm_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
new file mode 100644
index 000000000..04084ee7d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -0,0 +1,117 @@
+/*
+ * Ptrace test for VMX/VSX registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-vsx.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr, *pptr;
+
+unsigned long fp_load[VEC_MAX];
+unsigned long fp_load_new[VEC_MAX];
+unsigned long fp_store[VEC_MAX];
+
+void vsx(void)
+{
+	int ret;
+
+	cptr = (int *)shmat(shm_id, NULL, 0);
+	loadvsx(fp_load, 0);
+	cptr[1] = 1;
+
+	while (!cptr[0])
+		asm volatile("" : : : "memory");
+	shmdt((void *) cptr);
+
+	storevsx(fp_store, 0);
+	ret = compare_vsx_vmx(fp_store, fp_load_new);
+	if (ret)
+		exit(1);
+	exit(0);
+}
+
+int trace_vsx(pid_t child)
+{
+	unsigned long vsx[VSX_MAX];
+	unsigned long vmx[VMX_MAX + 2][2];
+
+	FAIL_IF(start_trace(child));
+	FAIL_IF(show_vsx(child, vsx));
+	FAIL_IF(validate_vsx(vsx, fp_load));
+	FAIL_IF(show_vmx(child, vmx));
+	FAIL_IF(validate_vmx(vmx, fp_load));
+
+	memset(vsx, 0, sizeof(vsx));
+	memset(vmx, 0, sizeof(vmx));
+	load_vsx_vmx(fp_load_new, vsx, vmx);
+
+	FAIL_IF(write_vsx(child, vsx));
+	FAIL_IF(write_vmx(child, vmx));
+	FAIL_IF(stop_trace(child));
+
+	return TEST_PASS;
+}
+
+int ptrace_vsx(void)
+{
+	pid_t pid;
+	int ret, status, i;
+
+	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+
+	for (i = 0; i < VEC_MAX; i++)
+		fp_load[i] = i + rand();
+
+	for (i = 0; i < VEC_MAX; i++)
+		fp_load_new[i] = i + 2 * rand();
+
+	pid = fork();
+	if (pid < 0) {
+		perror("fork() failed");
+		return TEST_FAIL;
+	}
+
+	if (pid == 0)
+		vsx();
+
+	if (pid) {
+		pptr = (int *)shmat(shm_id, NULL, 0);
+		while (!pptr[1])
+			asm volatile("" : : : "memory");
+
+		ret = trace_vsx(pid);
+		if (ret) {
+			kill(pid, SIGTERM);
+			shmdt((void *)pptr);
+			shmctl(shm_id, IPC_RMID, NULL);
+			return TEST_FAIL;
+		}
+
+		pptr[0] = 1;
+		shmdt((void *)pptr);
+
+		ret = wait(&status);
+		shmctl(shm_id, IPC_RMID, NULL);
+		if (ret != pid) {
+			printf("Child's exit status not captured\n");
+			return TEST_FAIL;
+		}
+
+		return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+			TEST_PASS;
+	}
+	return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(ptrace_vsx, "ptrace_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h
new file mode 100644
index 000000000..f4e4b427c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define VEC_MAX 128
+#define VSX_MAX 32
+#define VMX_MAX 32
+
+/*
+ * unsigned long vsx[32]
+ * unsigned long load[128]
+ */
+int validate_vsx(unsigned long *vsx, unsigned long *load)
+{
+	int i;
+
+	for (i = 0; i < VSX_MAX; i++) {
+		if (vsx[i] != load[2 * i + 1]) {
+			printf("vsx[%d]: %lx load[%d] %lx\n",
+					i, vsx[i], 2 * i + 1, load[2 * i + 1]);
+			return TEST_FAIL;
+		}
+	}
+	return TEST_PASS;
+}
+
+/*
+ * unsigned long vmx[32][2]
+ * unsigned long load[128]
+ */
+int validate_vmx(unsigned long vmx[][2], unsigned long *load)
+{
+	int i;
+
+	for (i = 0; i < VMX_MAX; i++) {
+		#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+		if ((vmx[i][0] != load[64 + 2 * i]) ||
+				(vmx[i][1] != load[65 + 2 * i])) {
+			printf("vmx[%d][0]: %lx load[%d] %lx\n",
+					i, vmx[i][0], 64 + 2 * i,
+					load[64 + 2 * i]);
+			printf("vmx[%d][1]: %lx load[%d] %lx\n",
+					i, vmx[i][1], 65 + 2 * i,
+					load[65 + 2 * i]);
+			return TEST_FAIL;
+		}
+		#else  /*
+			* In LE each value pair is stored in an
+			* alternate manner.
+			*/
+		if ((vmx[i][0] != load[65 + 2 * i]) ||
+				(vmx[i][1] != load[64 + 2 * i])) {
+			printf("vmx[%d][0]: %lx load[%d] %lx\n",
+					i, vmx[i][0], 65 + 2 * i,
+					load[65 + 2 * i]);
+			printf("vmx[%d][1]: %lx load[%d] %lx\n",
+					i, vmx[i][1], 64 + 2 * i,
+					load[64 + 2 * i]);
+			return TEST_FAIL;
+		}
+		#endif
+	}
+	return TEST_PASS;
+}
+
+/*
+ * unsigned long store[128]
+ * unsigned long load[128]
+ */
+int compare_vsx_vmx(unsigned long *store, unsigned long *load)
+{
+	int i;
+
+	for (i = 0; i < VSX_MAX; i++) {
+		if (store[1 + 2 * i] != load[1 + 2 * i]) {
+			printf("store[%d]: %lx load[%d] %lx\n",
+					1 + 2 * i, store[i],
+					1 + 2 * i, load[i]);
+			return TEST_FAIL;
+		}
+	}
+
+	#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	for (i = 64; i < VEC_MAX; i++) {
+		if (store[i] != load[i]) {
+			printf("store[%d]: %lx load[%d] %lx\n",
+					i, store[i], i, load[i]);
+			return TEST_FAIL;
+		}
+	}
+	#else	/* In LE each value pair is stored in an alternate manner */
+	for (i = 64; i < VEC_MAX; i++) {
+		if (!(i % 2) && (store[i] != load[i+1])) {
+			printf("store[%d]: %lx load[%d] %lx\n",
+					i, store[i], i+1, load[i+1]);
+			return TEST_FAIL;
+		}
+		if ((i % 2) && (store[i] != load[i-1])) {
+			printf("here store[%d]: %lx load[%d] %lx\n",
+					i, store[i], i-1, load[i-1]);
+			return TEST_FAIL;
+		}
+	}
+	#endif
+	return TEST_PASS;
+}
+
+void load_vsx_vmx(unsigned long *load, unsigned long *vsx,
+		unsigned long vmx[][2])
+{
+	int i;
+
+	for (i = 0; i < VSX_MAX; i++)
+		vsx[i] = load[1 + 2 * i];
+
+	for (i = 0; i < VMX_MAX; i++) {
+		vmx[i][0] = load[64 + 2 * i];
+		vmx[i][1] = load[65 + 2 * i];
+	}
+}
+
+void loadvsx(void *p, int tmp);
+void storevsx(void *p, int tmp);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h
new file mode 100644
index 000000000..34201cfa8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h
@@ -0,0 +1,749 @@
+/*
+ * Ptrace interface test helper functions
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/ptrace.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/signal.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/user.h>
+#include <linux/elf.h>
+#include <linux/types.h>
+#include <linux/auxvec.h>
+#include "reg.h"
+#include "utils.h"
+
+#define TEST_PASS 0
+#define TEST_FAIL 1
+
+struct fpr_regs {
+	unsigned long fpr[32];
+	unsigned long fpscr;
+};
+
+struct tm_spr_regs {
+	unsigned long tm_tfhar;
+	unsigned long tm_texasr;
+	unsigned long tm_tfiar;
+};
+
+#ifndef NT_PPC_TAR
+#define NT_PPC_TAR	0x103
+#define NT_PPC_PPR	0x104
+#define NT_PPC_DSCR	0x105
+#define NT_PPC_EBB	0x106
+#define NT_PPC_PMU	0x107
+#define NT_PPC_TM_CGPR	0x108
+#define NT_PPC_TM_CFPR	0x109
+#define NT_PPC_TM_CVMX	0x10a
+#define NT_PPC_TM_CVSX	0x10b
+#define NT_PPC_TM_SPR	0x10c
+#define NT_PPC_TM_CTAR	0x10d
+#define NT_PPC_TM_CPPR	0x10e
+#define NT_PPC_TM_CDSCR	0x10f
+#endif
+
+/* Basic ptrace operations */
+int start_trace(pid_t child)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_ATTACH, child, NULL, NULL);
+	if (ret) {
+		perror("ptrace(PTRACE_ATTACH) failed");
+		return TEST_FAIL;
+	}
+	ret = waitpid(child, NULL, 0);
+	if (ret != child) {
+		perror("waitpid() failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+int stop_trace(pid_t child)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_DETACH, child, NULL, NULL);
+	if (ret) {
+		perror("ptrace(PTRACE_DETACH) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+int cont_trace(pid_t child)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_CONT, child, NULL, NULL);
+	if (ret) {
+		perror("ptrace(PTRACE_CONT) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+int ptrace_read_regs(pid_t child, unsigned long type, unsigned long regs[],
+		     int n)
+{
+	struct iovec iov;
+	long ret;
+
+	FAIL_IF(start_trace(child));
+
+	iov.iov_base = regs;
+	iov.iov_len = n * sizeof(unsigned long);
+
+	ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
+	if (ret)
+		return ret;
+
+	FAIL_IF(stop_trace(child));
+
+	return TEST_PASS;
+}
+
+long ptrace_write_regs(pid_t child, unsigned long type, unsigned long regs[],
+		       int n)
+{
+	struct iovec iov;
+	long ret;
+
+	FAIL_IF(start_trace(child));
+
+	iov.iov_base = regs;
+	iov.iov_len = n * sizeof(unsigned long);
+
+	ret = ptrace(PTRACE_SETREGSET, child, type, &iov);
+
+	FAIL_IF(stop_trace(child));
+
+	return ret;
+}
+
+/* TAR, PPR, DSCR */
+int show_tar_registers(pid_t child, unsigned long *out)
+{
+	struct iovec iov;
+	unsigned long *reg;
+	int ret;
+
+	reg = malloc(sizeof(unsigned long));
+	if (!reg) {
+		perror("malloc() failed");
+		return TEST_FAIL;
+	}
+	iov.iov_base = (u64 *) reg;
+	iov.iov_len = sizeof(unsigned long);
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TAR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		goto fail;
+	}
+	if (out)
+		out[0] = *reg;
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_PPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		goto fail;
+	}
+	if (out)
+		out[1] = *reg;
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_DSCR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		goto fail;
+	}
+	if (out)
+		out[2] = *reg;
+
+	free(reg);
+	return TEST_PASS;
+fail:
+	free(reg);
+	return TEST_FAIL;
+}
+
+int write_tar_registers(pid_t child, unsigned long tar,
+		unsigned long ppr, unsigned long dscr)
+{
+	struct iovec iov;
+	unsigned long *reg;
+	int ret;
+
+	reg = malloc(sizeof(unsigned long));
+	if (!reg) {
+		perror("malloc() failed");
+		return TEST_FAIL;
+	}
+
+	iov.iov_base = (u64 *) reg;
+	iov.iov_len = sizeof(unsigned long);
+
+	*reg = tar;
+	ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TAR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_SETREGSET) failed");
+		goto fail;
+	}
+
+	*reg = ppr;
+	ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_PPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_SETREGSET) failed");
+		goto fail;
+	}
+
+	*reg = dscr;
+	ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_DSCR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_SETREGSET) failed");
+		goto fail;
+	}
+
+	free(reg);
+	return TEST_PASS;
+fail:
+	free(reg);
+	return TEST_FAIL;
+}
+
+int show_tm_checkpointed_state(pid_t child, unsigned long *out)
+{
+	struct iovec iov;
+	unsigned long *reg;
+	int ret;
+
+	reg = malloc(sizeof(unsigned long));
+	if (!reg) {
+		perror("malloc() failed");
+		return TEST_FAIL;
+	}
+
+	iov.iov_base = (u64 *) reg;
+	iov.iov_len = sizeof(unsigned long);
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CTAR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		goto fail;
+	}
+	if (out)
+		out[0] = *reg;
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CPPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		goto fail;
+	}
+	if (out)
+		out[1] = *reg;
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CDSCR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		goto fail;
+	}
+	if (out)
+		out[2] = *reg;
+
+	free(reg);
+	return TEST_PASS;
+
+fail:
+	free(reg);
+	return TEST_FAIL;
+}
+
+int write_ckpt_tar_registers(pid_t child, unsigned long tar,
+		unsigned long ppr, unsigned long dscr)
+{
+	struct iovec iov;
+	unsigned long *reg;
+	int ret;
+
+	reg = malloc(sizeof(unsigned long));
+	if (!reg) {
+		perror("malloc() failed");
+		return TEST_FAIL;
+	}
+
+	iov.iov_base = (u64 *) reg;
+	iov.iov_len = sizeof(unsigned long);
+
+	*reg = tar;
+	ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CTAR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		goto fail;
+	}
+
+	*reg = ppr;
+	ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CPPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		goto fail;
+	}
+
+	*reg = dscr;
+	ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CDSCR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		goto fail;
+	}
+
+	free(reg);
+	return TEST_PASS;
+fail:
+	free(reg);
+	return TEST_FAIL;
+}
+
+/* FPR */
+int show_fpr(pid_t child, unsigned long *fpr)
+{
+	struct fpr_regs *regs;
+	int ret, i;
+
+	regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+	ret = ptrace(PTRACE_GETFPREGS, child, NULL, regs);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+
+	if (fpr) {
+		for (i = 0; i < 32; i++)
+			fpr[i] = regs->fpr[i];
+	}
+	return TEST_PASS;
+}
+
+int write_fpr(pid_t child, unsigned long val)
+{
+	struct fpr_regs *regs;
+	int ret, i;
+
+	regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+	ret = ptrace(PTRACE_GETFPREGS, child, NULL, regs);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+
+	for (i = 0; i < 32; i++)
+		regs->fpr[i] = val;
+
+	ret = ptrace(PTRACE_SETFPREGS, child, NULL, regs);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+int show_ckpt_fpr(pid_t child, unsigned long *fpr)
+{
+	struct fpr_regs *regs;
+	struct iovec iov;
+	int ret, i;
+
+	regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+	iov.iov_base = regs;
+	iov.iov_len = sizeof(struct fpr_regs);
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CFPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+
+	if (fpr) {
+		for (i = 0; i < 32; i++)
+			fpr[i] = regs->fpr[i];
+	}
+
+	return TEST_PASS;
+}
+
+int write_ckpt_fpr(pid_t child, unsigned long val)
+{
+	struct fpr_regs *regs;
+	struct iovec iov;
+	int ret, i;
+
+	regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+	iov.iov_base = regs;
+	iov.iov_len = sizeof(struct fpr_regs);
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CFPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+
+	for (i = 0; i < 32; i++)
+		regs->fpr[i] = val;
+
+	ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CFPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+/* GPR */
+int show_gpr(pid_t child, unsigned long *gpr)
+{
+	struct pt_regs *regs;
+	int ret, i;
+
+	regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+	if (!regs) {
+		perror("malloc() failed");
+		return TEST_FAIL;
+	}
+
+	ret = ptrace(PTRACE_GETREGS, child, NULL, regs);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+
+	if (gpr) {
+		for (i = 14; i < 32; i++)
+			gpr[i-14] = regs->gpr[i];
+	}
+
+	return TEST_PASS;
+}
+
+int write_gpr(pid_t child, unsigned long val)
+{
+	struct pt_regs *regs;
+	int i, ret;
+
+	regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+	if (!regs) {
+		perror("malloc() failed");
+		return TEST_FAIL;
+	}
+
+	ret = ptrace(PTRACE_GETREGS, child, NULL, regs);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+
+	for (i = 14; i < 32; i++)
+		regs->gpr[i] = val;
+
+	ret = ptrace(PTRACE_SETREGS, child, NULL, regs);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+int show_ckpt_gpr(pid_t child, unsigned long *gpr)
+{
+	struct pt_regs *regs;
+	struct iovec iov;
+	int ret, i;
+
+	regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+	if (!regs) {
+		perror("malloc() failed");
+		return TEST_FAIL;
+	}
+
+	iov.iov_base = (u64 *) regs;
+	iov.iov_len = sizeof(struct pt_regs);
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CGPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+
+	if (gpr) {
+		for (i = 14; i < 32; i++)
+			gpr[i-14] = regs->gpr[i];
+	}
+
+	return TEST_PASS;
+}
+
+int write_ckpt_gpr(pid_t child, unsigned long val)
+{
+	struct pt_regs *regs;
+	struct iovec iov;
+	int ret, i;
+
+	regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+	if (!regs) {
+		perror("malloc() failed\n");
+		return TEST_FAIL;
+	}
+	iov.iov_base = (u64 *) regs;
+	iov.iov_len = sizeof(struct pt_regs);
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CGPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+
+	for (i = 14; i < 32; i++)
+		regs->gpr[i] = val;
+
+	ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CGPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+/* VMX */
+int show_vmx(pid_t child, unsigned long vmx[][2])
+{
+	int ret;
+
+	ret = ptrace(PTRACE_GETVRREGS, child, 0, vmx);
+	if (ret) {
+		perror("ptrace(PTRACE_GETVRREGS) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+int show_vmx_ckpt(pid_t child, unsigned long vmx[][2])
+{
+	unsigned long regs[34][2];
+	struct iovec iov;
+	int ret;
+
+	iov.iov_base = (u64 *) regs;
+	iov.iov_len = sizeof(regs);
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CVMX, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET, NT_PPC_TM_CVMX) failed");
+		return TEST_FAIL;
+	}
+	memcpy(vmx, regs, sizeof(regs));
+	return TEST_PASS;
+}
+
+
+int write_vmx(pid_t child, unsigned long vmx[][2])
+{
+	int ret;
+
+	ret = ptrace(PTRACE_SETVRREGS, child, 0, vmx);
+	if (ret) {
+		perror("ptrace(PTRACE_SETVRREGS) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+int write_vmx_ckpt(pid_t child, unsigned long vmx[][2])
+{
+	unsigned long regs[34][2];
+	struct iovec iov;
+	int ret;
+
+	memcpy(regs, vmx, sizeof(regs));
+	iov.iov_base = (u64 *) regs;
+	iov.iov_len = sizeof(regs);
+	ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CVMX, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_SETREGSET, NT_PPC_TM_CVMX) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+/* VSX */
+int show_vsx(pid_t child, unsigned long *vsx)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_GETVSRREGS, child, 0, vsx);
+	if (ret) {
+		perror("ptrace(PTRACE_GETVSRREGS) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+int show_vsx_ckpt(pid_t child, unsigned long *vsx)
+{
+	unsigned long regs[32];
+	struct iovec iov;
+	int ret;
+
+	iov.iov_base = (u64 *) regs;
+	iov.iov_len = sizeof(regs);
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CVSX, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET, NT_PPC_TM_CVSX) failed");
+		return TEST_FAIL;
+	}
+	memcpy(vsx, regs, sizeof(regs));
+	return TEST_PASS;
+}
+
+int write_vsx(pid_t child, unsigned long *vsx)
+{
+	int ret;
+
+	ret = ptrace(PTRACE_SETVSRREGS, child, 0, vsx);
+	if (ret) {
+		perror("ptrace(PTRACE_SETVSRREGS) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+int write_vsx_ckpt(pid_t child, unsigned long *vsx)
+{
+	unsigned long regs[32];
+	struct iovec iov;
+	int ret;
+
+	memcpy(regs, vsx, sizeof(regs));
+	iov.iov_base = (u64 *) regs;
+	iov.iov_len = sizeof(regs);
+	ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CVSX, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_SETREGSET, NT_PPC_TM_CVSX) failed");
+		return TEST_FAIL;
+	}
+	return TEST_PASS;
+}
+
+/* TM SPR */
+int show_tm_spr(pid_t child, struct tm_spr_regs *out)
+{
+	struct tm_spr_regs *regs;
+	struct iovec iov;
+	int ret;
+
+	regs = (struct tm_spr_regs *) malloc(sizeof(struct tm_spr_regs));
+	if (!regs) {
+		perror("malloc() failed");
+		return TEST_FAIL;
+	}
+
+	iov.iov_base = (u64 *) regs;
+	iov.iov_len = sizeof(struct tm_spr_regs);
+
+	ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_SPR, &iov);
+	if (ret) {
+		perror("ptrace(PTRACE_GETREGSET) failed");
+		return TEST_FAIL;
+	}
+
+	if (out)
+		memcpy(out, regs, sizeof(struct tm_spr_regs));
+
+	return TEST_PASS;
+}
+
+
+
+/* Analyse TEXASR after TM failure */
+inline unsigned long get_tfiar(void)
+{
+	unsigned long ret;
+
+	asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_TFIAR));
+	return ret;
+}
+
+void analyse_texasr(unsigned long texasr)
+{
+	printf("TEXASR: %16lx\t", texasr);
+
+	if (texasr & TEXASR_FP)
+		printf("TEXASR_FP  ");
+
+	if (texasr & TEXASR_DA)
+		printf("TEXASR_DA  ");
+
+	if (texasr & TEXASR_NO)
+		printf("TEXASR_NO  ");
+
+	if (texasr & TEXASR_FO)
+		printf("TEXASR_FO  ");
+
+	if (texasr & TEXASR_SIC)
+		printf("TEXASR_SIC  ");
+
+	if (texasr & TEXASR_NTC)
+		printf("TEXASR_NTC  ");
+
+	if (texasr & TEXASR_TC)
+		printf("TEXASR_TC  ");
+
+	if (texasr & TEXASR_TIC)
+		printf("TEXASR_TIC  ");
+
+	if (texasr & TEXASR_IC)
+		printf("TEXASR_IC  ");
+
+	if (texasr & TEXASR_IFC)
+		printf("TEXASR_IFC  ");
+
+	if (texasr & TEXASR_ABT)
+		printf("TEXASR_ABT  ");
+
+	if (texasr & TEXASR_SPD)
+		printf("TEXASR_SPD  ");
+
+	if (texasr & TEXASR_HV)
+		printf("TEXASR_HV  ");
+
+	if (texasr & TEXASR_PR)
+		printf("TEXASR_PR  ");
+
+	if (texasr & TEXASR_FS)
+		printf("TEXASR_FS  ");
+
+	if (texasr & TEXASR_TE)
+		printf("TEXASR_TE  ");
+
+	if (texasr & TEXASR_ROT)
+		printf("TEXASR_ROT  ");
+
+	printf("TFIAR :%lx\n", get_tfiar());
+}
+
+void store_gpr(unsigned long *addr);
+void store_fpr(float *addr);
diff --git a/tools/testing/selftests/powerpc/scripts/hmi.sh b/tools/testing/selftests/powerpc/scripts/hmi.sh
new file mode 100755
index 000000000..83fb253ae
--- /dev/null
+++ b/tools/testing/selftests/powerpc/scripts/hmi.sh
@@ -0,0 +1,89 @@
+#!/bin/sh
+#
+# Copyright 2015, Daniel Axtens, IBM Corporation
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+
+# do we have ./getscom, ./putscom?
+if [ -x ./getscom ] && [ -x ./putscom ]; then
+	GETSCOM=./getscom
+	PUTSCOM=./putscom
+elif which getscom > /dev/null; then
+	GETSCOM=$(which getscom)
+	PUTSCOM=$(which putscom)
+else
+	cat <<EOF
+Can't find getscom/putscom in . or \$PATH.
+See https://github.com/open-power/skiboot.
+The tool is in external/xscom-utils
+EOF
+	exit 1
+fi
+
+# We will get 8 HMI events per injection
+# todo: deal with things being offline
+expected_hmis=8
+COUNT_HMIS() {
+    dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt'
+}
+
+# massively expand snooze delay, allowing injection on all cores
+ppc64_cpu --smt-snooze-delay=1000000000
+
+# when we exit, restore it
+trap "ppc64_cpu --smt-snooze-delay=100" 0 1
+
+# for each chip+core combination
+# todo - less fragile parsing
+egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
+while read chipcore; do
+	chip=$(echo "$chipcore"|awk '{print $3}')
+	core=$(echo "$chipcore"|awk '{print $5}')
+	fir="0x1${core}013100"
+
+	# verify that Core FIR is zero as expected
+	if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then
+		echo "FIR was not zero before injection for chip $chip, core $core. Aborting!"
+		echo "Result of $GETSCOM -c 0x${chip} $fir:"
+		$GETSCOM -c 0x${chip} $fir
+		echo "If you get a -5 error, the core may be in idle state. Try stress-ng."
+		echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0"
+		exit 1
+	fi
+
+	# keep track of the number of HMIs handled
+	old_hmis=$(COUNT_HMIS)
+
+	# do injection, adding a marker to dmesg for clarity
+	echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg
+	# inject a RegFile recoverable error
+	if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then
+		echo "Error injecting. Aborting!"
+		exit 1
+	fi
+
+	# now we want to wait for all the HMIs to be processed
+	# we expect one per thread on the core
+	i=0;
+	new_hmis=$(COUNT_HMIS)
+	while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do
+	    echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping"
+	    sleep 5;
+	    i=$((i + 1))
+	    new_hmis=$(COUNT_HMIS)
+	done
+	if [ $i = 12 ]; then
+	    echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting."
+	    exit 1
+	fi
+	echo "Processed $expected_hmis events; presumed success. Check dmesg."
+	echo ""
+done
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
new file mode 100644
index 000000000..1b89224a8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -0,0 +1,2 @@
+signal
+signal_tm
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
new file mode 100644
index 000000000..209a958dc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := signal signal_tm
+
+CFLAGS += -maltivec
+$(OUTPUT)/signal_tm: CFLAGS += -mhtm
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S
diff --git a/tools/testing/selftests/powerpc/signal/signal.S b/tools/testing/selftests/powerpc/signal/signal.S
new file mode 100644
index 000000000..322f2f1fc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/signal.S
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+
+/* long signal_self(pid_t pid, int sig); */
+FUNC_START(signal_self)
+	li	r0,37 /* sys_kill */
+	/* r3 already has our pid in it */
+	/* r4 already has signal type in it */
+	sc
+	bc	4,3,1f
+	subfze	r3,r3
+1:	blr
+FUNC_END(signal_self)
+
+/* long tm_signal_self(pid_t pid, int sig, int *ret); */
+FUNC_START(tm_signal_self)
+	PUSH_BASIC_STACK(8)
+	std	r5,STACK_FRAME_PARAM(0)(sp) /* ret */
+	tbegin.
+	beq	1f
+	tsuspend.
+	li	r0,37 /* sys_kill */
+	/* r3 already has our pid in it */
+	/* r4 already has signal type in it */
+	sc
+	ld	r5,STACK_FRAME_PARAM(0)(sp) /* ret */
+	bc	4,3,2f
+	subfze	r3,r3
+2:	std	r3,0(r5)
+	tabort. 0
+	tresume. /* Be nice to some cleanup, jumps back to tbegin then to 1: */
+	/*
+	 * Transaction should be proper doomed and we should never get
+	 * here
+	 */
+	li	r3,1
+	POP_BASIC_STACK(8)
+	blr
+1:	li	r3,0
+	POP_BASIC_STACK(8)
+	blr
+FUNC_END(tm_signal_self)
diff --git a/tools/testing/selftests/powerpc/signal/signal.c b/tools/testing/selftests/powerpc/signal/signal.c
new file mode 100644
index 000000000..e7dedd28b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/signal.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Sending one self a signal should always get delivered.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+
+#define MAX_ATTEMPT 500000
+#define TIMEOUT 5
+
+extern long signal_self(pid_t pid, int sig);
+
+static sig_atomic_t signaled;
+static sig_atomic_t fail;
+
+static void signal_handler(int sig)
+{
+	if (sig == SIGUSR1)
+		signaled = 1;
+	else
+		fail = 1;
+}
+
+static int test_signal()
+{
+	int i;
+	struct sigaction act;
+	pid_t ppid = getpid();
+	pid_t pid;
+
+	act.sa_handler = signal_handler;
+	act.sa_flags = 0;
+	sigemptyset(&act.sa_mask);
+	if (sigaction(SIGUSR1, &act, NULL) < 0) {
+		perror("sigaction SIGUSR1");
+		exit(1);
+	}
+	if (sigaction(SIGALRM, &act, NULL) < 0) {
+		perror("sigaction SIGALRM");
+		exit(1);
+	}
+
+	/* Don't do this for MAX_ATTEMPT, its simply too long */
+	for(i  = 0; i < 1000; i++) {
+		pid = fork();
+		if (pid == -1) {
+			perror("fork");
+			exit(1);
+		}
+		if (pid == 0) {
+			signal_self(ppid, SIGUSR1);
+			exit(1);
+		} else {
+			alarm(0); /* Disable any pending */
+			alarm(2);
+			while (!signaled && !fail)
+				asm volatile("": : :"memory");
+			if (!signaled) {
+				fprintf(stderr, "Didn't get signal from child\n");
+				FAIL_IF(1); /* For the line number */
+			}
+			/* Otherwise we'll loop too fast and fork() will eventually fail */
+			waitpid(pid, NULL, 0);
+		}
+	}
+
+	for (i = 0; i < MAX_ATTEMPT; i++) {
+		long rc;
+
+		alarm(0); /* Disable any pending */
+		signaled = 0;
+		alarm(TIMEOUT);
+		rc = signal_self(ppid, SIGUSR1);
+		if (rc) {
+			fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx",
+					i, fail, rc);
+			FAIL_IF(1); /* For the line number */
+		}
+		while (!signaled && !fail)
+			asm volatile("": : :"memory");
+		if (!signaled) {
+			fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx",
+					i, fail, rc);
+			FAIL_IF(1); /* For the line number */
+		}
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	test_harness_set_timeout(300);
+	return test_harness(test_signal, "signal");
+}
diff --git a/tools/testing/selftests/powerpc/signal/signal_tm.c b/tools/testing/selftests/powerpc/signal/signal_tm.c
new file mode 100644
index 000000000..2e7451a37
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/signal_tm.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Sending one self a signal should always get delivered.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "../tm/tm.h"
+
+#define MAX_ATTEMPT 500000
+#define TIMEOUT 10
+
+extern long tm_signal_self(pid_t pid, int sig, long *ret);
+
+static sig_atomic_t signaled;
+static sig_atomic_t fail;
+
+static void signal_handler(int sig)
+{
+	if (tcheck_active()) {
+		fail = 2;
+		return;
+	}
+
+	if (sig == SIGUSR1)
+		signaled = 1;
+	else
+		fail = 1;
+}
+
+static int test_signal_tm()
+{
+	int i;
+	struct sigaction act;
+
+	act.sa_handler = signal_handler;
+	act.sa_flags = 0;
+	sigemptyset(&act.sa_mask);
+	if (sigaction(SIGUSR1, &act, NULL) < 0) {
+		perror("sigaction SIGUSR1");
+		exit(1);
+	}
+	if (sigaction(SIGALRM, &act, NULL) < 0) {
+		perror("sigaction SIGALRM");
+		exit(1);
+	}
+
+	SKIP_IF(!have_htm());
+
+	for (i = 0; i < MAX_ATTEMPT; i++) {
+		/*
+		 * If anything bad happens in ASM and we fail to set ret
+		 * because *handwave* TM this will cause failure
+		 */
+		long ret = 0xdead;
+		long rc = 0xbeef;
+
+		alarm(0); /* Disable any pending */
+		signaled = 0;
+		alarm(TIMEOUT);
+		FAIL_IF(tcheck_transactional());
+		rc = tm_signal_self(getpid(), SIGUSR1, &ret);
+		if (ret == 0xdead)
+			/*
+			 * This basically means the transaction aborted before we
+			 * even got to the suspend... this is crazy but it
+			 * happens.
+			 * Yes this also means we might never make forward
+			 * progress... the alarm() will trip eventually...
+			 */
+			continue;
+
+		if (rc || ret) {
+			/* Ret is actually an errno */
+			printf("TEXASR 0x%016lx, TFIAR 0x%016lx\n",
+					__builtin_get_texasr(), __builtin_get_tfiar());
+			fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx ret=0x%lx\n",
+					i, fail, rc, ret);
+			FAIL_IF(ret);
+		}
+		while(!signaled && !fail)
+			asm volatile("": : :"memory");
+		if (!signaled) {
+			fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx ret=0x%lx\n",
+					i, fail, rc, ret);
+			FAIL_IF(fail); /* For the line number */
+		}
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_signal_tm, "signal_tm");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore
new file mode 100644
index 000000000..0b43da74e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/.gitignore
@@ -0,0 +1 @@
+memcmp
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
new file mode 100644
index 000000000..7fc0623d8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+# The loops are all 64-bit code
+CFLAGS += -I$(CURDIR)
+
+EXTRA_SOURCES := ../harness.c
+
+build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi)
+
+TEST_GEN_PROGS := memcmp_64 strlen
+
+$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
+
+ifeq ($(build_32bit),1)
+$(OUTPUT)/memcmp_32: memcmp.c
+$(OUTPUT)/memcmp_32: CFLAGS += -m32
+
+TEST_GEN_PROGS += memcmp_32
+endif
+
+$(OUTPUT)/strlen: strlen.c string.c
+
+ifeq ($(build_32bit),1)
+$(OUTPUT)/strlen_32: strlen.c
+$(OUTPUT)/strlen_32: CFLAGS += -m32
+
+TEST_GEN_PROGS += strlen_32
+endif
+
+ASFLAGS = $(CFLAGS)
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/cache.h b/tools/testing/selftests/powerpc/stringloops/asm/cache.h
new file mode 100644
index 000000000..8a2840831
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/cache.h
@@ -0,0 +1 @@
+#define	IFETCH_ALIGN_BYTES 4
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/asm/export.h
new file mode 100644
index 000000000..2d14a9b42
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/export.h
@@ -0,0 +1 @@
+#define EXPORT_SYMBOL(x)
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h
new file mode 100644
index 000000000..9de413c0c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * provides masks and opcode images for use by code generation, emulation
+ * and for instructions that older assemblers might not know about
+ */
+#ifndef _ASM_POWERPC_PPC_OPCODE_H
+#define _ASM_POWERPC_PPC_OPCODE_H
+
+
+#  define stringify_in_c(...)	__VA_ARGS__
+#  define ASM_CONST(x)		x
+
+
+#define PPC_INST_VCMPEQUD_RC		0x100000c7
+#define PPC_INST_VCMPEQUB_RC		0x10000006
+
+#define __PPC_RC21     (0x1 << 10)
+
+/* macros to insert fields into opcodes */
+#define ___PPC_RA(a)	(((a) & 0x1f) << 16)
+#define ___PPC_RB(b)	(((b) & 0x1f) << 11)
+#define ___PPC_RS(s)	(((s) & 0x1f) << 21)
+#define ___PPC_RT(t)	___PPC_RS(t)
+
+#define VCMPEQUD_RC(vrt, vra, vrb)	stringify_in_c(.long PPC_INST_VCMPEQUD_RC | \
+			      ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+			      ___PPC_RB(vrb) | __PPC_RC21)
+
+#define VCMPEQUB_RC(vrt, vra, vrb)	stringify_in_c(.long PPC_INST_VCMPEQUB_RC | \
+			      ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+			      ___PPC_RB(vrb) | __PPC_RC21)
+
+#endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
new file mode 100644
index 000000000..d2c0a911f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_ASM_H
+#define __PPC_ASM_H
+#include <ppc-asm.h>
+
+#ifndef r1
+#define r1 sp
+#endif
+
+#define _GLOBAL(A) FUNC_START(test_ ## A)
+#define _GLOBAL_TOC(A) FUNC_START(test_ ## A)
+
+#define CONFIG_ALTIVEC
+
+#define R14 r14
+#define R15 r15
+#define R16 r16
+#define R17 r17
+#define R18 r18
+#define R19 r19
+#define R20 r20
+#define R21 r21
+#define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
+
+#define STACKFRAMESIZE	256
+#define STK_REG(i)	(112 + ((i)-14)*8)
+
+#define BEGIN_FTR_SECTION
+#define END_FTR_SECTION_IFSET(val)
+#endif
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
new file mode 100644
index 000000000..b1fa75469
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "utils.h"
+
+#define SIZE 256
+#define ITERATIONS 10000
+
+#define LARGE_SIZE (5 * 1024)
+#define LARGE_ITERATIONS 1000
+#define LARGE_MAX_OFFSET 32
+#define LARGE_SIZE_START 4096
+
+#define MAX_OFFSET_DIFF_S1_S2 48
+
+int vmx_count;
+int enter_vmx_ops(void)
+{
+	vmx_count++;
+	return 1;
+}
+
+void exit_vmx_ops(void)
+{
+	vmx_count--;
+}
+int test_memcmp(const void *s1, const void *s2, size_t n);
+
+/* test all offsets and lengths */
+static void test_one(char *s1, char *s2, unsigned long max_offset,
+		unsigned long size_start, unsigned long max_size)
+{
+	unsigned long offset, size;
+
+	for (offset = 0; offset < max_offset; offset++) {
+		for (size = size_start; size < (max_size - offset); size++) {
+			int x, y;
+			unsigned long i;
+
+			y = memcmp(s1+offset, s2+offset, size);
+			x = test_memcmp(s1+offset, s2+offset, size);
+
+			if (((x ^ y) < 0) &&	/* Trick to compare sign */
+				((x | y) != 0)) { /* check for zero */
+				printf("memcmp returned %d, should have returned %d (offset %ld size %ld)\n", x, y, offset, size);
+
+				for (i = offset; i < offset+size; i++)
+					printf("%02x ", s1[i]);
+				printf("\n");
+
+				for (i = offset; i < offset+size; i++)
+					printf("%02x ", s2[i]);
+				printf("\n");
+				abort();
+			}
+
+			if (vmx_count != 0) {
+				printf("vmx enter/exit not paired.(offset:%ld size:%ld s1:%p s2:%p vc:%d\n",
+					offset, size, s1, s2, vmx_count);
+				printf("\n");
+				abort();
+			}
+		}
+	}
+}
+
+static int testcase(bool islarge)
+{
+	char *s1;
+	char *s2;
+	unsigned long i;
+
+	unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
+	unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+	int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+	s1 = memalign(128, alloc_size);
+	if (!s1) {
+		perror("memalign");
+		exit(1);
+	}
+
+	s2 = memalign(128, alloc_size);
+	if (!s2) {
+		perror("memalign");
+		exit(1);
+	}
+
+	srandom(time(0));
+
+	for (i = 0; i < iterations; i++) {
+		unsigned long j;
+		unsigned long change;
+		char *rand_s1 = s1;
+		char *rand_s2 = s2;
+
+		for (j = 0; j < alloc_size; j++)
+			s1[j] = random();
+
+		rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2;
+		rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2;
+		memcpy(rand_s2, rand_s1, comp_size);
+
+		/* change one byte */
+		change = random() % comp_size;
+		rand_s2[change] = random() & 0xff;
+
+		if (islarge)
+			test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET,
+					LARGE_SIZE_START, comp_size);
+		else
+			test_one(rand_s1, rand_s2, SIZE, 0, comp_size);
+	}
+
+	srandom(time(0));
+
+	for (i = 0; i < iterations; i++) {
+		unsigned long j;
+		unsigned long change;
+		char *rand_s1 = s1;
+		char *rand_s2 = s2;
+
+		for (j = 0; j < alloc_size; j++)
+			s1[j] = random();
+
+		rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2;
+		rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2;
+		memcpy(rand_s2, rand_s1, comp_size);
+
+		/* change multiple bytes, 1/8 of total */
+		for (j = 0; j < comp_size / 8; j++) {
+			change = random() % comp_size;
+			s2[change] = random() & 0xff;
+		}
+
+		if (islarge)
+			test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET,
+					LARGE_SIZE_START, comp_size);
+		else
+			test_one(rand_s1, rand_s2, SIZE, 0, comp_size);
+	}
+
+	return 0;
+}
+
+static int testcases(void)
+{
+	testcase(0);
+	testcase(1);
+	return 0;
+}
+
+int main(void)
+{
+	test_harness_set_timeout(300);
+	return test_harness(testcases, "memcmp");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_32.S b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S
new file mode 120000
index 000000000..056f2b3af
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcmp_32.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_64.S b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S
new file mode 120000
index 000000000..9bc87e438
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcmp_64.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/stringloops/string.c b/tools/testing/selftests/powerpc/stringloops/string.c
new file mode 100644
index 000000000..45e777541
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/string.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copied from linux/lib/string.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include <stddef.h>
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ */
+size_t test_strlen(const char *s)
+{
+	const char *sc;
+
+	for (sc = s; *sc != '\0'; ++sc)
+		/* nothing */;
+	return sc - s;
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/strlen.c b/tools/testing/selftests/powerpc/stringloops/strlen.c
new file mode 100644
index 000000000..9055ebc48
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/strlen.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "utils.h"
+
+#define SIZE 256
+#define ITERATIONS 1000
+#define ITERATIONS_BENCH 100000
+
+int test_strlen(const void *s);
+
+/* test all offsets and lengths */
+static void test_one(char *s)
+{
+	unsigned long offset;
+
+	for (offset = 0; offset < SIZE; offset++) {
+		int x, y;
+		unsigned long i;
+
+		y = strlen(s + offset);
+		x = test_strlen(s + offset);
+
+		if (x != y) {
+			printf("strlen() returned %d, should have returned %d (%p offset %ld)\n", x, y, s, offset);
+
+			for (i = offset; i < SIZE; i++)
+				printf("%02x ", s[i]);
+			printf("\n");
+		}
+	}
+}
+
+static void bench_test(char *s)
+{
+	struct timespec ts_start, ts_end;
+	int i;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+	for (i = 0; i < ITERATIONS_BENCH; i++)
+		test_strlen(s);
+
+	clock_gettime(CLOCK_MONOTONIC, &ts_end);
+
+	printf("len %3.3d : time = %.6f\n", test_strlen(s), ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9);
+}
+
+static int testcase(void)
+{
+	char *s;
+	unsigned long i;
+
+	s = memalign(128, SIZE);
+	if (!s) {
+		perror("memalign");
+		exit(1);
+	}
+
+	srandom(1);
+
+	memset(s, 0, SIZE);
+	for (i = 0; i < SIZE; i++) {
+		char c;
+
+		do {
+			c = random() & 0x7f;
+		} while (!c);
+		s[i] = c;
+		test_one(s);
+	}
+
+	for (i = 0; i < ITERATIONS; i++) {
+		unsigned long j;
+
+		for (j = 0; j < SIZE; j++) {
+			char c;
+
+			do {
+				c = random() & 0x7f;
+			} while (!c);
+			s[j] = c;
+		}
+		for (j = 0; j < sizeof(long); j++) {
+			s[SIZE - 1 - j] = 0;
+			test_one(s);
+		}
+	}
+
+	for (i = 0; i < SIZE; i++) {
+		char c;
+
+		do {
+			c = random() & 0x7f;
+		} while (!c);
+		s[i] = c;
+	}
+
+	bench_test(s);
+
+	s[16] = 0;
+	bench_test(s);
+
+	s[8] = 0;
+	bench_test(s);
+
+	s[4] = 0;
+	bench_test(s);
+
+	s[3] = 0;
+	bench_test(s);
+
+	s[2] = 0;
+	bench_test(s);
+
+	s[1] = 0;
+	bench_test(s);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(testcase, "strlen");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/strlen_32.S b/tools/testing/selftests/powerpc/stringloops/strlen_32.S
new file mode 120000
index 000000000..72b13731b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/strlen_32.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/strlen_32.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/switch_endian/.gitignore b/tools/testing/selftests/powerpc/switch_endian/.gitignore
new file mode 100644
index 000000000..89e762eab
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/.gitignore
@@ -0,0 +1,2 @@
+switch_endian_test
+check-reversed.S
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
new file mode 100644
index 000000000..bdc081afe
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := switch_endian_test
+
+ASFLAGS += -O2 -Wall -g -nostdlib -m64
+
+EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT)
+$(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
+
+$(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o
+	$(CROSS_COMPILE)objcopy -j .text --reverse-bytes=4 -O binary $< $@
+
+$(OUTPUT)/check-reversed.S: $(OUTPUT)/check-reversed.o
+	hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
diff --git a/tools/testing/selftests/powerpc/switch_endian/check.S b/tools/testing/selftests/powerpc/switch_endian/check.S
new file mode 100644
index 000000000..927a5c675
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/check.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "common.h"
+
+/*
+ * Checks that registers contain what we expect, ie. they were not clobbered by
+ * the syscall.
+ *
+ * r15: pattern to check registers against.
+ *
+ * At the end r3 == 0 if everything's OK.
+ */
+	nop			# guaranteed to be illegal in reverse-endian
+	mr	r9,r15
+	cmpd    r9,r3		# check r3
+	bne     1f
+	addi    r9,r15,4	# check r4
+	cmpd    r9,r4
+	bne     1f
+	lis     r9,0x00FF	# check CR
+	ori     r9,r9,0xF000
+	mfcr    r10
+	and     r10,r10,r9
+	cmpw    r9,r10
+	addi    r9,r15,34
+	bne     1f
+	addi    r9,r15,32	# check LR
+	mflr    r10
+	cmpd    r9,r10
+	bne     1f
+	addi    r9,r15,5	# check r5
+	cmpd    r9,r5
+	bne     1f
+	addi    r9,r15,6	# check r6
+	cmpd    r9,r6
+	bne     1f
+	addi    r9,r15,7	# check r7
+	cmpd    r9,r7
+	bne     1f
+	addi    r9,r15,8	# check r8
+	cmpd    r9,r8
+	bne     1f
+	addi    r9,r15,13	# check r13
+	cmpd    r9,r13
+	bne     1f
+	addi    r9,r15,14	# check r14
+	cmpd    r9,r14
+	bne     1f
+	addi    r9,r15,16	# check r16
+	cmpd    r9,r16
+	bne     1f
+	addi    r9,r15,17	# check r17
+	cmpd    r9,r17
+	bne     1f
+	addi    r9,r15,18	# check r18
+	cmpd    r9,r18
+	bne     1f
+	addi    r9,r15,19	# check r19
+	cmpd    r9,r19
+	bne     1f
+	addi    r9,r15,20	# check r20
+	cmpd    r9,r20
+	bne     1f
+	addi    r9,r15,21	# check r21
+	cmpd    r9,r21
+	bne     1f
+	addi    r9,r15,22	# check r22
+	cmpd    r9,r22
+	bne     1f
+	addi    r9,r15,23	# check r23
+	cmpd    r9,r23
+	bne     1f
+	addi    r9,r15,24	# check r24
+	cmpd    r9,r24
+	bne     1f
+	addi    r9,r15,25	# check r25
+	cmpd    r9,r25
+	bne     1f
+	addi    r9,r15,26	# check r26
+	cmpd    r9,r26
+	bne     1f
+	addi    r9,r15,27	# check r27
+	cmpd    r9,r27
+	bne     1f
+	addi    r9,r15,28	# check r28
+	cmpd    r9,r28
+	bne     1f
+	addi    r9,r15,29	# check r29
+	cmpd    r9,r29
+	bne     1f
+	addi    r9,r15,30	# check r30
+	cmpd    r9,r30
+	bne     1f
+	addi    r9,r15,31	# check r31
+	cmpd    r9,r31
+	bne     1f
+	b	2f
+1:	mr	r3, r9
+	li	r0, __NR_exit
+	sc
+2:	li	r0, __NR_switch_endian
+	nop
diff --git a/tools/testing/selftests/powerpc/switch_endian/common.h b/tools/testing/selftests/powerpc/switch_endian/common.h
new file mode 100644
index 000000000..1434cbc2a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/common.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+#ifndef __NR_switch_endian
+#define __NR_switch_endian 363
+#endif
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
new file mode 100644
index 000000000..cc4930467
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "common.h"
+
+	.data
+	.balign 8
+message:
+	.ascii "success: switch_endian_test\n\0"
+
+	.section ".toc"
+	.balign 8
+pattern:
+	.8byte 0x5555AAAA5555AAAA
+
+	.text
+FUNC_START(_start)
+	/* Load the pattern */
+	ld	r15, pattern@TOC(%r2)
+
+	/* Setup CR, only CR2-CR4 are maintained */
+	lis	r3, 0x00FF
+	ori	r3, r3, 0xF000
+	mtcr	r3
+
+	/* Load the pattern slightly modified into the registers */
+	mr	r3, r15
+	addi	r4, r15, 4
+
+	addi	r5, r15, 32
+	mtlr	r5
+
+	addi	r5, r15, 5
+	addi	r6, r15, 6
+	addi	r7, r15, 7
+	addi	r8, r15, 8
+
+	/* r9 - r12 are clobbered */
+
+	addi	r13, r15, 13
+	addi	r14, r15, 14
+
+	/* Skip r15 we're using it */
+
+	addi	r16, r15, 16
+	addi	r17, r15, 17
+	addi	r18, r15, 18
+	addi	r19, r15, 19
+	addi	r20, r15, 20
+	addi	r21, r15, 21
+	addi	r22, r15, 22
+	addi	r23, r15, 23
+	addi	r24, r15, 24
+	addi	r25, r15, 25
+	addi	r26, r15, 26
+	addi	r27, r15, 27
+	addi	r28, r15, 28
+	addi	r29, r15, 29
+	addi	r30, r15, 30
+	addi	r31, r15, 31
+
+	/*
+	 * Call the syscall to switch endian.
+	 * It clobbers r9-r12, XER, CTR and CR0-1,5-7.
+	 */
+	li r0, __NR_switch_endian
+	sc
+
+#include "check-reversed.S"
+
+	/* Flip back, r0 already has the switch syscall number */
+	.long	0x02000044	/* sc */
+
+#include "check.S"
+
+	li	r0, __NR_write
+	li	r3, 1	/* stdout */
+	ld	r4, message@got(%r2)
+	li	r5, 28	/* strlen(message3) */
+	sc
+	li      r0, __NR_exit
+	li	r3, 0
+	sc
+	b       .
diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore
new file mode 100644
index 000000000..f0f3fcc9d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/.gitignore
@@ -0,0 +1 @@
+ipc_unmuxed
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
new file mode 100644
index 000000000..161b88463
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -0,0 +1,8 @@
+TEST_GEN_PROGS := ipc_unmuxed
+
+CFLAGS += -I../../../../../usr/include
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/syscalls/ipc.h b/tools/testing/selftests/powerpc/syscalls/ipc.h
new file mode 100644
index 000000000..26a20682c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/ipc.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef __NR_semop
+DO_TEST(semop, __NR_semop)
+#endif
+
+#ifdef __NR_semget
+DO_TEST(semget, __NR_semget)
+#endif
+
+#ifdef __NR_semctl
+DO_TEST(semctl, __NR_semctl)
+#endif
+
+#ifdef __NR_semtimedop
+DO_TEST(semtimedop, __NR_semtimedop)
+#endif
+
+#ifdef __NR_msgsnd
+DO_TEST(msgsnd, __NR_msgsnd)
+#endif
+
+#ifdef __NR_msgrcv
+DO_TEST(msgrcv, __NR_msgrcv)
+#endif
+
+#ifdef __NR_msgget
+DO_TEST(msgget, __NR_msgget)
+#endif
+
+#ifdef __NR_msgctl
+DO_TEST(msgctl, __NR_msgctl)
+#endif
+
+#ifdef __NR_shmat
+DO_TEST(shmat, __NR_shmat)
+#endif
+
+#ifdef __NR_shmdt
+DO_TEST(shmdt, __NR_shmdt)
+#endif
+
+#ifdef __NR_shmget
+DO_TEST(shmget, __NR_shmget)
+#endif
+
+#ifdef __NR_shmctl
+DO_TEST(shmctl, __NR_shmctl)
+#endif
diff --git a/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c b/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c
new file mode 100644
index 000000000..2ac02706f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2015, Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test simply tests that certain syscalls are implemented. It doesn't
+ * actually exercise their logic in any way.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "utils.h"
+
+
+#define DO_TEST(_name, _num)	\
+static int test_##_name(void)			\
+{						\
+	int rc;					\
+	printf("Testing " #_name);		\
+	errno = 0;				\
+	rc = syscall(_num, -1, 0, 0, 0, 0, 0);	\
+	printf("\treturned %d, errno %d\n", rc, errno); \
+	return errno == ENOSYS;			\
+}
+
+#include "ipc.h"
+#undef DO_TEST
+
+static int ipc_unmuxed(void)
+{
+	int tests_done = 0;
+
+#define DO_TEST(_name, _num)		\
+	FAIL_IF(test_##_name());	\
+	tests_done++;
+
+#include "ipc.h"
+#undef DO_TEST
+
+	/*
+	 * If we ran no tests then it means none of the syscall numbers were
+	 * defined, possibly because we were built against old headers. But it
+	 * means we didn't really test anything, so instead of passing mark it
+	 * as a skip to give the user a clue.
+	 */
+	SKIP_IF(tests_done == 0);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(ipc_unmuxed, "ipc_unmuxed");
+}
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
new file mode 100644
index 000000000..c3ee8393d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -0,0 +1,17 @@
+tm-resched-dscr
+tm-syscall
+tm-signal-msr-resv
+tm-signal-stack
+tm-vmxcopy
+tm-fork
+tm-tar
+tm-tmspr
+tm-exec
+tm-signal-context-chk-fpu
+tm-signal-context-chk-gpr
+tm-signal-context-chk-vmx
+tm-signal-context-chk-vsx
+tm-vmx-unavail
+tm-unavailable
+tm-trap
+tm-sigreturn
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
new file mode 100644
index 000000000..9fc2cf6fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu \
+	tm-signal-context-chk-vmx tm-signal-context-chk-vsx
+
+TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
+	tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
+	$(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+CFLAGS += -mhtm
+
+$(OUTPUT)/tm-syscall: tm-syscall-asm.S
+$(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
+$(OUTPUT)/tm-tmspr: CFLAGS += -pthread
+$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
+$(OUTPUT)/tm-resched-dscr: ../pmu/lib.c
+$(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
+$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
+
+SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
+$(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
+$(SIGNAL_CONTEXT_CHK_TESTS): CFLAGS += -mhtm -m64 -mvsx
diff --git a/tools/testing/selftests/powerpc/tm/tm-exec.c b/tools/testing/selftests/powerpc/tm/tm-exec.c
new file mode 100644
index 000000000..3d27fa0ec
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-exec.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Syscalls can be performed provided the transactions are suspended.
+ * The exec() class of syscall is unique as a new process is loaded.
+ *
+ * It makes little sense for after an exec() call for the previously
+ * suspended transaction to still exist.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "utils.h"
+#include "tm.h"
+
+static char *path;
+
+static int test_exec(void)
+{
+	SKIP_IF(!have_htm());
+
+	asm __volatile__(
+		"tbegin.;"
+		"blt    1f; "
+		"tsuspend.;"
+		"1: ;"
+		: : : "memory");
+
+	execl(path, "tm-exec", "--child", NULL);
+
+	/* Shouldn't get here */
+	perror("execl() failed");
+	return 1;
+}
+
+static int after_exec(void)
+{
+	asm __volatile__(
+		"tbegin.;"
+		"blt    1f;"
+		"tsuspend.;"
+		"1: ;"
+		: : : "memory");
+
+	FAIL_IF(failure_is_nesting());
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	path = argv[0];
+
+	if (argc > 1 && strcmp(argv[1], "--child") == 0)
+		return after_exec();
+
+	return test_harness(test_exec, "tm_exec");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-fork.c b/tools/testing/selftests/powerpc/tm/tm-fork.c
new file mode 100644
index 000000000..8d48579b7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-fork.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Edited: Rashmica Gupta, Nov 2015
+ *
+ * This test does a fork syscall inside a transaction. Basic sniff test
+ * to see if we can enter the kernel during a transaction.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int test_fork(void)
+{
+	SKIP_IF(!have_htm());
+
+	asm __volatile__(
+		"tbegin.;"
+		"blt    1f; "
+		"li     0, 2;"  /* fork syscall */
+		"sc  ;"
+		"tend.;"
+		"1: ;"
+		: : : "memory", "r0");
+	/* If we reach here, we've passed.  Otherwise we've probably crashed
+	 * the kernel */
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_fork, "tm_fork");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
new file mode 100644
index 000000000..4cdb83964
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test context switching to see if the DSCR SPR is correctly preserved
+ * when within a transaction.
+ *
+ * Note: We assume that the DSCR has been left at the default value (0)
+ * for all CPUs.
+ *
+ * Method:
+ *
+ * Set a value into the DSCR.
+ *
+ * Start a transaction, and suspend it (*).
+ *
+ * Hard loop checking to see if the transaction has become doomed.
+ *
+ * Now that we *may* have been preempted, record the DSCR and TEXASR SPRS.
+ *
+ * If the abort was because of a context switch, check the DSCR value.
+ * Otherwise, try again.
+ *
+ * (*) If the transaction is not suspended we can't see the problem because
+ * the transaction abort handler will restore the DSCR to it's checkpointed
+ * value before we regain control.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <asm/tm.h>
+
+#include "utils.h"
+#include "tm.h"
+#include "../pmu/lib.h"
+
+#define SPRN_DSCR       0x03
+
+int test_body(void)
+{
+	uint64_t rv, dscr1 = 1, dscr2, texasr;
+
+	SKIP_IF(!have_htm());
+
+	printf("Check DSCR TM context switch: ");
+	fflush(stdout);
+	for (;;) {
+		asm __volatile__ (
+			/* set a known value into the DSCR */
+			"ld      3, %[dscr1];"
+			"mtspr   %[sprn_dscr], 3;"
+
+			"li      %[rv], 1;"
+			/* start and suspend a transaction */
+			"tbegin.;"
+			"beq     1f;"
+			"tsuspend.;"
+
+			/* hard loop until the transaction becomes doomed */
+			"2: ;"
+			"tcheck 0;"
+			"bc      4, 0, 2b;"
+
+			/* record DSCR and TEXASR */
+			"mfspr   3, %[sprn_dscr];"
+			"std     3, %[dscr2];"
+			"mfspr   3, %[sprn_texasr];"
+			"std     3, %[texasr];"
+
+			"tresume.;"
+			"tend.;"
+			"li      %[rv], 0;"
+			"1: ;"
+			: [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr)
+			: [dscr1]"m"(dscr1)
+			, [sprn_dscr]"i"(SPRN_DSCR), [sprn_texasr]"i"(SPRN_TEXASR)
+			: "memory", "r3"
+		);
+		assert(rv); /* make sure the transaction aborted */
+		if ((texasr >> 56) != TM_CAUSE_RESCHED) {
+			continue;
+		}
+		if (dscr2 != dscr1) {
+			printf(" FAIL\n");
+			return 1;
+		} else {
+			printf(" OK\n");
+			return 0;
+		}
+	}
+}
+
+static int tm_resched_dscr(void)
+{
+	return eat_cpu(test_body);
+}
+
+int main(int argc, const char *argv[])
+{
+	return test_harness(tm_resched_dscr, "tm_resched_dscr");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
new file mode 100644
index 000000000..c760debbd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_FPU_REGS 18
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+/* Be sure there are 2x as many as there are NV FPU regs (2x18) */
+static double fps[] = {
+	 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+	-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18
+};
+
+static sig_atomic_t fail;
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+	int i;
+	ucontext_t *ucp = uc;
+	ucontext_t *tm_ucp = ucp->uc_link;
+
+	for (i = 0; i < NV_FPU_REGS && !fail; i++) {
+		fail = (ucp->uc_mcontext.fp_regs[i + 14] != fps[i]);
+		fail |= (tm_ucp->uc_mcontext.fp_regs[i + 14] != fps[i + NV_FPU_REGS]);
+		if (fail)
+			printf("Failed on %d FP %g or %g\n", i, ucp->uc_mcontext.fp_regs[i + 14], tm_ucp->uc_mcontext.fp_regs[i + 14]);
+	}
+}
+
+static int tm_signal_context_chk_fpu()
+{
+	struct sigaction act;
+	int i;
+	long rc;
+	pid_t pid = getpid();
+
+	SKIP_IF(!have_htm());
+
+	act.sa_sigaction = signal_usr1;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGUSR1, &act, NULL) < 0) {
+		perror("sigaction sigusr1");
+		exit(1);
+	}
+
+	i = 0;
+	while (i < MAX_ATTEMPT && !fail) {
+		rc = tm_signal_self_context_load(pid, NULL, fps, NULL, NULL);
+		FAIL_IF(rc != pid);
+		i++;
+	}
+
+	return fail;
+}
+
+int main(void)
+{
+	return test_harness(tm_signal_context_chk_fpu, "tm_signal_context_chk_fpu");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
new file mode 100644
index 000000000..df91330a0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_GPR_REGS 18
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+static sig_atomic_t fail;
+
+static long gps[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+					 -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18};
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+	int i;
+	ucontext_t *ucp = uc;
+	ucontext_t *tm_ucp = ucp->uc_link;
+
+	for (i = 0; i < NV_GPR_REGS && !fail; i++) {
+		fail = (ucp->uc_mcontext.gp_regs[i + 14] != gps[i]);
+		fail |= (tm_ucp->uc_mcontext.gp_regs[i + 14] != gps[i + NV_GPR_REGS]);
+		if (fail)
+			printf("Failed on %d GPR %lu or %lu\n", i,
+					ucp->uc_mcontext.gp_regs[i + 14], tm_ucp->uc_mcontext.gp_regs[i + 14]);
+	}
+}
+
+static int tm_signal_context_chk_gpr()
+{
+	struct sigaction act;
+	int i;
+	long rc;
+	pid_t pid = getpid();
+
+	SKIP_IF(!have_htm());
+
+	act.sa_sigaction = signal_usr1;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGUSR1, &act, NULL) < 0) {
+		perror("sigaction sigusr1");
+		exit(1);
+	}
+
+	i = 0;
+	while (i < MAX_ATTEMPT && !fail) {
+		rc = tm_signal_self_context_load(pid, gps, NULL, NULL, NULL);
+		FAIL_IF(rc != pid);
+		i++;
+	}
+
+	return fail;
+}
+
+int main(void)
+{
+	return test_harness(tm_signal_context_chk_gpr, "tm_signal_context_chk_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
new file mode 100644
index 000000000..f0ee55fd5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_VMX_REGS 12
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+static sig_atomic_t fail;
+
+vector int vms[] = {
+	{1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12},
+	{13,14,15,16},{17,18,19,20},{21,22,23,24},
+	{25,26,27,28},{29,30,31,32},{33,34,35,36},
+	{37,38,39,40},{41,42,43,44},{45,46,47,48},
+	{-1, -2, -3, -4}, {-5, -6, -7, -8}, {-9, -10,-11,-12},
+	{-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24},
+	{-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36},
+	{-37,-38,-39,-40},{-41,-42,-43,-44},{-45,-46,-47,-48}
+};
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+	int i;
+	ucontext_t *ucp = uc;
+	ucontext_t *tm_ucp = ucp->uc_link;
+
+	for (i = 0; i < NV_VMX_REGS && !fail; i++) {
+		fail = memcmp(ucp->uc_mcontext.v_regs->vrregs[i + 20],
+				&vms[i], sizeof(vector int));
+		fail |= memcmp(tm_ucp->uc_mcontext.v_regs->vrregs[i + 20],
+				&vms[i + NV_VMX_REGS], sizeof (vector int));
+
+		if (fail) {
+			int j;
+
+			fprintf(stderr, "Failed on %d vmx 0x", i);
+			for (j = 0; j < 4; j++)
+				fprintf(stderr, "%04x", ucp->uc_mcontext.v_regs->vrregs[i + 20][j]);
+			fprintf(stderr, " vs 0x");
+			for (j = 0 ; j < 4; j++)
+				fprintf(stderr, "%04x", tm_ucp->uc_mcontext.v_regs->vrregs[i + 20][j]);
+			fprintf(stderr, "\n");
+		}
+	}
+}
+
+static int tm_signal_context_chk()
+{
+	struct sigaction act;
+	int i;
+	long rc;
+	pid_t pid = getpid();
+
+	SKIP_IF(!have_htm());
+
+	act.sa_sigaction = signal_usr1;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGUSR1, &act, NULL) < 0) {
+		perror("sigaction sigusr1");
+		exit(1);
+	}
+
+	i = 0;
+	while (i < MAX_ATTEMPT && !fail) {
+		rc = tm_signal_self_context_load(pid, NULL, NULL, vms, NULL);
+		FAIL_IF(rc != pid);
+		i++;
+	}
+
+	return fail;
+}
+
+int main(void)
+{
+	return test_harness(tm_signal_context_chk, "tm_signal_context_chk_vmx");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
new file mode 100644
index 000000000..b99c3d835
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_VSX_REGS 12
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+static sig_atomic_t fail;
+
+vector int vss[] = {
+	{1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12},
+	{13,14,15,16},{17,18,19,20},{21,22,23,24},
+	{25,26,27,28},{29,30,31,32},{33,34,35,36},
+	{37,38,39,40},{41,42,43,44},{45,46,47,48},
+	{-1, -2, -3, -4 },{-5, -6, -7, -8 },{-9, -10,-11,-12},
+	{-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24},
+	{-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36},
+	{-37,-38,-39,-40},{-41,-42,-43,-44},{-45,-46,-47,-48}
+};
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+	int i;
+	uint8_t vsc[sizeof(vector int)];
+	uint8_t vst[sizeof(vector int)];
+	ucontext_t *ucp = uc;
+	ucontext_t *tm_ucp = ucp->uc_link;
+
+	/*
+	 * The other half of the VSX regs will be after v_regs.
+	 *
+	 * In short, vmx_reserve array holds everything. v_regs is a 16
+	 * byte aligned pointer at the start of vmx_reserve (vmx_reserve
+	 * may or may not be 16 aligned) where the v_regs structure exists.
+	 * (half of) The VSX regsters are directly after v_regs so the
+	 * easiest way to find them below.
+	 */
+	long *vsx_ptr = (long *)(ucp->uc_mcontext.v_regs + 1);
+	long *tm_vsx_ptr = (long *)(tm_ucp->uc_mcontext.v_regs + 1);
+	for (i = 0; i < NV_VSX_REGS && !fail; i++) {
+		memcpy(vsc, &ucp->uc_mcontext.fp_regs[i + 20], 8);
+		memcpy(vsc + 8, &vsx_ptr[20 + i], 8);
+		fail = memcmp(vsc, &vss[i], sizeof(vector int));
+		memcpy(vst, &tm_ucp->uc_mcontext.fp_regs[i + 20], 8);
+		memcpy(vst + 8, &tm_vsx_ptr[20 + i], 8);
+		fail |= memcmp(vst, &vss[i + NV_VSX_REGS], sizeof(vector int));
+
+		if (fail) {
+			int j;
+
+			fprintf(stderr, "Failed on %d vsx 0x", i);
+			for (j = 0; j < 16; j++)
+				fprintf(stderr, "%02x", vsc[j]);
+			fprintf(stderr, " vs 0x");
+			for (j = 0; j < 16; j++)
+				fprintf(stderr, "%02x", vst[j]);
+			fprintf(stderr, "\n");
+		}
+	}
+}
+
+static int tm_signal_context_chk()
+{
+	struct sigaction act;
+	int i;
+	long rc;
+	pid_t pid = getpid();
+
+	SKIP_IF(!have_htm());
+
+	act.sa_sigaction = signal_usr1;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGUSR1, &act, NULL) < 0) {
+		perror("sigaction sigusr1");
+		exit(1);
+	}
+
+	i = 0;
+	while (i < MAX_ATTEMPT && !fail) {
+		rc = tm_signal_self_context_load(pid, NULL, NULL, NULL, vss);
+		FAIL_IF(rc != pid);
+		i++;
+	}
+
+	return fail;
+}
+
+int main(void)
+{
+	return test_harness(tm_signal_context_chk, "tm_signal_context_chk_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
new file mode 100644
index 000000000..8c54d18b3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's signal return code to ensure that it doesn't
+ * crash when both the transactional and suspend MSR bits are set in
+ * the signal context.
+ *
+ * For this test, we send ourselves a SIGUSR1.  In the SIGUSR1 handler
+ * we modify the signal context to set both MSR TM S and T bits (which
+ * is "reserved" by the PowerISA). When we return from the signal
+ * handler (implicit sigreturn), the kernel should detect reserved MSR
+ * value and send us with a SIGSEGV.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int segv_expected = 0;
+
+void signal_segv(int signum)
+{
+	if (segv_expected && (signum == SIGSEGV))
+		_exit(0);
+	_exit(1);
+}
+
+void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+	ucontext_t *ucp = uc;
+
+	/* Link tm checkpointed context to normal context */
+	ucp->uc_link = ucp;
+	/* Set all TM bits so that the context is now invalid */
+#ifdef __powerpc64__
+	ucp->uc_mcontext.gp_regs[PT_MSR] |= (7ULL << 32);
+#else
+	ucp->uc_mcontext.uc_regs->gregs[PT_MSR] |= (7ULL);
+#endif
+	/* Should segv on return becuase of invalid context */
+	segv_expected = 1;
+}
+
+int tm_signal_msr_resv()
+{
+	struct sigaction act;
+
+	SKIP_IF(!have_htm());
+
+	act.sa_sigaction = signal_usr1;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_SIGINFO;
+	if (sigaction(SIGUSR1, &act, NULL) < 0) {
+		perror("sigaction sigusr1");
+		exit(1);
+	}
+	if (signal(SIGSEGV, signal_segv) == SIG_ERR)
+		exit(1);
+
+	raise(SIGUSR1);
+
+	/* We shouldn't get here as we exit in the segv handler */
+	return 1;
+}
+
+int main(void)
+{
+	return test_harness(tm_signal_msr_resv, "tm_signal_msr_resv");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
new file mode 100644
index 000000000..1f0eb5674
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's signal delievery code to ensure that we don't
+ * trelaim twice in the kernel signal delivery code.  This can happen
+ * if we trigger a signal when in a transaction and the stack pointer
+ * is bogus.
+ *
+ * This test case registers a SEGV handler, sets the stack pointer
+ * (r1) to NULL, starts a transaction and then generates a SEGV.  The
+ * SEGV should be handled but we exit here as the stack pointer is
+ * invalid and hance we can't sigreturn.  We only need to check that
+ * this flow doesn't crash the kernel.
+ */
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+
+#include "utils.h"
+#include "tm.h"
+
+void signal_segv(int signum)
+{
+	/* This should never actually run since stack is foobar */
+	exit(1);
+}
+
+int tm_signal_stack()
+{
+	int pid;
+
+	SKIP_IF(!have_htm());
+
+	pid = fork();
+	if (pid < 0)
+		exit(1);
+
+	if (pid) { /* Parent */
+		/*
+		 * It's likely the whole machine will crash here so if
+		 * the child ever exits, we are good.
+		 */
+		wait(NULL);
+		return 0;
+	}
+
+	/*
+	 * The flow here is:
+	 * 1) register a signal handler (so signal delievery occurs)
+	 * 2) make stack pointer (r1) = NULL
+	 * 3) start transaction
+	 * 4) cause segv
+	 */
+	if (signal(SIGSEGV, signal_segv) == SIG_ERR)
+		exit(1);
+	asm volatile("li 1, 0 ;"		/* stack ptr == NULL */
+		     "1:"
+		     "tbegin.;"
+		     "beq 1b ;"			/* retry forever */
+		     "tsuspend.;"
+		     "ld 2, 0(1) ;"		/* trigger segv" */
+		     : : : "memory");
+
+	/* This should never get here due to above segv */
+	return 1;
+}
+
+int main(void)
+{
+	return test_harness(tm_signal_stack, "tm_signal_stack");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal.S b/tools/testing/selftests/powerpc/tm/tm-signal.S
new file mode 100644
index 000000000..506a4ebaf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal.S
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "gpr_asm.h"
+#include "fpu_asm.h"
+#include "vmx_asm.h"
+#include "vsx_asm.h"
+
+/*
+ * Large caveat here being that the caller cannot expect the
+ * signal to always be sent! The hardware can (AND WILL!) abort
+ * the transaction between the tbegin and the tsuspend (however
+ * unlikely it seems or infrequently it actually happens).
+ * You have been warned.
+ */
+/* long tm_signal_self(pid_t pid, long *gprs, double *fps, vector *vms, vector *vss); */
+FUNC_START(tm_signal_self_context_load)
+	PUSH_BASIC_STACK(512)
+	/*
+	 * Don't strictly need to save and restore as it depends on if
+	 * we're going to use them, however this reduces messy logic
+	 */
+	PUSH_VMX(STACK_FRAME_LOCAL(5,0),r8)
+	PUSH_FPU(512)
+	PUSH_NVREGS_BELOW_FPU(512)
+	std r3, STACK_FRAME_PARAM(0)(sp) /* pid */
+	std r4, STACK_FRAME_PARAM(1)(sp) /* gps */
+	std r5, STACK_FRAME_PARAM(2)(sp) /* fps */
+	std r6, STACK_FRAME_PARAM(3)(sp) /* vms */
+	std r7, STACK_FRAME_PARAM(4)(sp) /* vss */
+
+	ld r3, STACK_FRAME_PARAM(1)(sp)
+	cmpdi r3, 0
+	beq skip_gpr_lc
+	bl load_gpr
+skip_gpr_lc:
+	ld r3, STACK_FRAME_PARAM(2)(sp)
+	cmpdi	r3, 0
+	beq	skip_fpu_lc
+	bl load_fpu
+skip_fpu_lc:
+	ld r3, STACK_FRAME_PARAM(3)(sp)
+	cmpdi r3, 0
+	beq	skip_vmx_lc
+	bl load_vmx
+skip_vmx_lc:
+	ld r3, STACK_FRAME_PARAM(4)(sp)
+	cmpdi	r3, 0
+	beq	skip_vsx_lc
+	bl load_vsx
+skip_vsx_lc:
+	/*
+	 * Set r3 (return value) before tbegin. Use the pid as a known
+	 * 'all good' return value, zero is used to indicate a non-doomed
+	 * transaction.
+	 */
+	ld	r3, STACK_FRAME_PARAM(0)(sp)
+	tbegin.
+	beq	1f
+	tsuspend. /* Can't enter a syscall transactionally */
+	ld	r3, STACK_FRAME_PARAM(1)(sp)
+	cmpdi	r3, 0
+	beq skip_gpr_lt
+	/* Get the second half of the array */
+	addi	r3, r3, 8 * 18
+	bl load_gpr
+skip_gpr_lt:
+	ld r3, STACK_FRAME_PARAM(2)(sp)
+	cmpdi	r3, 0
+	beq	skip_fpu_lt
+	/* Get the second half of the array */
+	addi	r3, r3, 8 * 18
+	bl load_fpu
+skip_fpu_lt:
+	ld r3, STACK_FRAME_PARAM(3)(sp)
+	cmpdi r3, 0
+	beq	skip_vmx_lt
+	/* Get the second half of the array */
+	addi	r3, r3, 16 * 12
+	bl load_vmx
+skip_vmx_lt:
+	ld r3, STACK_FRAME_PARAM(4)(sp)
+	cmpdi	r3, 0
+	beq	skip_vsx_lt
+	/* Get the second half of the array */
+	addi	r3, r3, 16 * 12
+	bl load_vsx
+skip_vsx_lt:
+	li	r0, 37 /* sys_kill */
+	ld r3, STACK_FRAME_PARAM(0)(sp) /* pid */
+	li r4, 10 /* SIGUSR1 */
+	sc /* Taking the signal will doom the transaction */
+	tabort. 0
+	tresume. /* Be super sure we abort */
+	/*
+	 * This will cause us to resume doomed transaction and cause
+	 * hardware to cleanup, we'll end up at 1: anything between
+	 * tresume. and 1: shouldn't ever run.
+	 */
+	li r3, 0
+	1:
+	POP_VMX(STACK_FRAME_LOCAL(5,0),r4)
+	POP_FPU(512)
+	POP_NVREGS_BELOW_FPU(512)
+	POP_BASIC_STACK(512)
+	blr
+FUNC_END(tm_signal_self_context_load)
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
new file mode 100644
index 000000000..9a6017a1d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2015, Laurent Dufour, IBM Corp.
+ *
+ * Test the kernel's signal returning code to check reclaim is done if the
+ * sigreturn() is called while in a transaction (suspended since active is
+ * already dropped trough the system call path).
+ *
+ * The kernel must discard the transaction when entering sigreturn, since
+ * restoring the potential TM SPRS from the signal frame is requiring to not be
+ * in a transaction.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "tm.h"
+#include "utils.h"
+
+
+void handler(int sig)
+{
+	uint64_t ret;
+
+	asm __volatile__(
+		"li             3,1             ;"
+		"tbegin.                        ;"
+		"beq            1f              ;"
+		"li             3,0             ;"
+		"tsuspend.                      ;"
+		"1:                             ;"
+		"std%X[ret]     3, %[ret]       ;"
+		: [ret] "=m"(ret)
+		:
+		: "memory", "3", "cr0");
+
+	if (ret)
+		exit(1);
+
+	/*
+	 * We return from the signal handle while in a suspended transaction
+	 */
+}
+
+
+int tm_sigreturn(void)
+{
+	struct sigaction sa;
+	uint64_t ret = 0;
+
+	SKIP_IF(!have_htm());
+	SKIP_IF(!is_ppc64le());
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = handler;
+	sigemptyset(&sa.sa_mask);
+
+	if (sigaction(SIGSEGV, &sa, NULL))
+		exit(1);
+
+	asm __volatile__(
+		"tbegin.                        ;"
+		"beq            1f              ;"
+		"li             3,0             ;"
+		"std            3,0(3)          ;" /* trigger SEGV */
+		"li             3,1             ;"
+		"std%X[ret]     3,%[ret]        ;"
+		"tend.                          ;"
+		"b              2f              ;"
+		"1:                             ;"
+		"li             3,2             ;"
+		"std%X[ret]     3,%[ret]        ;"
+		"2:                             ;"
+		: [ret] "=m"(ret)
+		:
+		: "memory", "3", "cr0");
+
+	if (ret != 2)
+		exit(1);
+
+	exit(0);
+}
+
+int main(void)
+{
+	return test_harness(tm_sigreturn, "tm_sigreturn");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
new file mode 100644
index 000000000..bd1ca25fe
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+	.text
+FUNC_START(getppid_tm_active)
+	tbegin.
+	beq 1f
+	li	r0, __NR_getppid
+	sc
+	tend.
+	blr
+1:
+	li	r3, -1
+	blr
+
+FUNC_START(getppid_tm_suspended)
+	tbegin.
+	beq 1f
+	li	r0, __NR_getppid
+	tsuspend.
+	sc
+	tresume.
+	tend.
+	blr
+1:
+	li	r3, -1
+	blr
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
new file mode 100644
index 000000000..454b965a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2015, Sam Bobroff, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's system call code to ensure that a system call
+ * made from within an active HTM transaction is aborted with the
+ * correct failure code.
+ * Conversely, ensure that a system call made from within a
+ * suspended transaction can succeed.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/tm.h>
+#include <sys/time.h>
+#include <stdlib.h>
+
+#include "utils.h"
+#include "tm.h"
+
+extern int getppid_tm_active(void);
+extern int getppid_tm_suspended(void);
+
+unsigned retries = 0;
+
+#define TEST_DURATION 10 /* seconds */
+#define TM_RETRIES 100
+
+pid_t getppid_tm(bool suspend)
+{
+	int i;
+	pid_t pid;
+
+	for (i = 0; i < TM_RETRIES; i++) {
+		if (suspend)
+			pid = getppid_tm_suspended();
+		else
+			pid = getppid_tm_active();
+
+		if (pid >= 0)
+			return pid;
+
+		if (failure_is_persistent()) {
+			if (failure_is_syscall())
+				return -1;
+
+			printf("Unexpected persistent transaction failure.\n");
+			printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+			       __builtin_get_texasr(), __builtin_get_tfiar());
+			exit(-1);
+		}
+
+		retries++;
+	}
+
+	printf("Exceeded limit of %d temporary transaction failures.\n", TM_RETRIES);
+	printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+	       __builtin_get_texasr(), __builtin_get_tfiar());
+
+	exit(-1);
+}
+
+int tm_syscall(void)
+{
+	unsigned count = 0;
+	struct timeval end, now;
+
+	SKIP_IF(!have_htm_nosc());
+
+	setbuf(stdout, NULL);
+
+	printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION);
+
+	gettimeofday(&end, NULL);
+	now.tv_sec = TEST_DURATION;
+	now.tv_usec = 0;
+	timeradd(&end, &now, &end);
+
+	for (count = 0; timercmp(&now, &end, <); count++) {
+		/*
+		 * Test a syscall within a suspended transaction and verify
+		 * that it succeeds.
+		 */
+		FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */
+
+		/*
+		 * Test a syscall within an active transaction and verify that
+		 * it fails with the correct failure code.
+		 */
+		FAIL_IF(getppid_tm(false) != -1);  /* Should fail... */
+		FAIL_IF(!failure_is_persistent()); /* ...persistently... */
+		FAIL_IF(!failure_is_syscall());    /* ...with code syscall. */
+		gettimeofday(&now, 0);
+	}
+
+	printf("%d active and suspended transactions behaved correctly.\n", count);
+	printf("(There were %d transaction retries.)\n", retries);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(tm_syscall, "tm_syscall");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c
new file mode 100644
index 000000000..f31fe5a28
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-tar.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ * Original: Michael Neuling 19/7/2013
+ * Edited: Rashmica Gupta 01/12/2015
+ *
+ * Do some transactions, see if the tar is corrupted.
+ * If the transaction is aborted, the TAR should be rolled back to the
+ * checkpointed value before the transaction began. The value written to
+ * TAR in suspended mode should only remain in TAR if the transaction
+ * completes.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int	num_loops	= 10000;
+
+int test_tar(void)
+{
+	int i;
+
+	SKIP_IF(!have_htm());
+	SKIP_IF(!is_ppc64le());
+
+	for (i = 0; i < num_loops; i++)
+	{
+		uint64_t result = 0;
+		asm __volatile__(
+			"li	7, 1;"
+			"mtspr	%[tar], 7;"	/* tar = 1 */
+			"tbegin.;"
+			"beq	3f;"
+			"li	4, 0x7000;"	/* Loop lots, to use time */
+			"2:;"			/* Start loop */
+			"li	7, 2;"
+			"mtspr	%[tar], 7;"	/* tar = 2 */
+			"tsuspend.;"
+			"li	7, 3;"
+			"mtspr	%[tar], 7;"	/* tar = 3 */
+			"tresume.;"
+			"subi	4, 4, 1;"
+			"cmpdi	4, 0;"
+			"bne	2b;"
+			"tend.;"
+
+			/* Transaction sucess! TAR should be 3 */
+			"mfspr  7, %[tar];"
+			"ori	%[res], 7, 4;"  // res = 3|4 = 7
+			"b	4f;"
+
+			/* Abort handler. TAR should be rolled back to 1 */
+			"3:;"
+			"mfspr  7, %[tar];"
+			"ori	%[res], 7, 8;"	// res = 1|8 = 9
+			"4:;"
+
+			: [res]"=r"(result)
+			: [tar]"i"(SPRN_TAR)
+			   : "memory", "r0", "r4", "r7");
+
+		/* If result is anything else other than 7 or 9, the tar
+		 * value must have been corrupted. */
+		if ((result != 7) && (result != 9))
+			return 1;
+	}
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	/* A low number of iterations (eg 100) can cause a false pass */
+	if (argc > 1) {
+		if (strcmp(argv[1], "-h") == 0) {
+			printf("Syntax:\n\t%s [<num loops>]\n",
+			       argv[0]);
+			return 1;
+		} else {
+			num_loops = atoi(argv[1]);
+		}
+	}
+
+	printf("Starting, %d loops\n", num_loops);
+
+	return test_harness(test_tar, "tm_tar");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
new file mode 100644
index 000000000..df1d7d4b1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Original: Michael Neuling 3/4/2014
+ * Modified: Rashmica Gupta 8/12/2015
+ *
+ * Check if any of the Transaction Memory SPRs get corrupted.
+ * - TFIAR  - stores address of location of transaction failure
+ * - TFHAR  - stores address of software failure handler (if transaction
+ *   fails)
+ * - TEXASR - lots of info about the transacion(s)
+ *
+ * (1) create more threads than cpus
+ * (2) in each thread:
+ * 	(a) set TFIAR and TFHAR a unique value
+ * 	(b) loop for awhile, continually checking to see if
+ * 	either register has been corrupted.
+ *
+ * (3) Loop:
+ * 	(a) begin transaction
+ *    	(b) abort transaction
+ *	(c) check TEXASR to see if FS has been corrupted
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int	num_loops	= 10000;
+int	passed = 1;
+
+void tfiar_tfhar(void *in)
+{
+	int i, cpu;
+	unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd;
+	cpu_set_t cpuset;
+
+	CPU_ZERO(&cpuset);
+	cpu = (unsigned long)in >> 1;
+	CPU_SET(cpu, &cpuset);
+	sched_setaffinity(0, sizeof(cpuset), &cpuset);
+
+	/* TFIAR: Last bit has to be high so userspace can read register */
+	tfiar = ((unsigned long)in) + 1;
+	tfiar += 2;
+	mtspr(SPRN_TFIAR, tfiar);
+
+	/* TFHAR: Last two bits are reserved */
+	tfhar = ((unsigned long)in);
+	tfhar &= ~0x3UL;
+	tfhar += 4;
+	mtspr(SPRN_TFHAR, tfhar);
+
+	for (i = 0; i < num_loops; i++)	{
+		tfhar_rd = mfspr(SPRN_TFHAR);
+		tfiar_rd = mfspr(SPRN_TFIAR);
+		if ( (tfhar != tfhar_rd) || (tfiar != tfiar_rd) ) {
+			passed = 0;
+			return;
+		}
+	}
+	return;
+}
+
+void texasr(void *in)
+{
+	unsigned long i;
+	uint64_t result = 0;
+
+	for (i = 0; i < num_loops; i++) {
+		asm __volatile__(
+			"tbegin.;"
+			"beq    3f ;"
+			"tabort. 0 ;"
+			"tend.;"
+
+			/* Abort handler */
+			"3: ;"
+			::: "memory");
+
+                /* Check the TEXASR */
+                result = mfspr(SPRN_TEXASR);
+		if ((result & TEXASR_FS) == 0) {
+			passed = 0;
+			return;
+		}
+	}
+	return;
+}
+
+int test_tmspr()
+{
+	pthread_t	*thread;
+	int	   	thread_num;
+	unsigned long	i;
+
+	SKIP_IF(!have_htm());
+
+	/* To cause some context switching */
+	thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
+
+	thread = malloc(thread_num * sizeof(pthread_t));
+	if (thread == NULL)
+		return EXIT_FAILURE;
+
+	/* Test TFIAR and TFHAR */
+	for (i = 0; i < thread_num; i += 2) {
+		if (pthread_create(&thread[i], NULL, (void *)tfiar_tfhar,
+				   (void *)i))
+			return EXIT_FAILURE;
+	}
+	/* Test TEXASR */
+	for (i = 1; i < thread_num; i += 2) {
+		if (pthread_create(&thread[i], NULL, (void *)texasr, (void *)i))
+			return EXIT_FAILURE;
+	}
+
+	for (i = 0; i < thread_num; i++) {
+		if (pthread_join(thread[i], NULL) != 0)
+			return EXIT_FAILURE;
+	}
+
+	free(thread);
+
+	if (passed)
+		return 0;
+	else
+		return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	if (argc > 1) {
+		if (strcmp(argv[1], "-h") == 0) {
+			printf("Syntax:\t [<num loops>]\n");
+			return 0;
+		} else {
+			num_loops = atoi(argv[1]);
+		}
+	}
+	return test_harness(test_tmspr, "tm_tmspr");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
new file mode 100644
index 000000000..179d592f0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2017, Gustavo Romero, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Check if thread endianness is flipped inadvertently to BE on trap
+ * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after
+ * load_fp and load_vec overflowed).
+ *
+ * The issue can be checked on LE machines simply by zeroing load_fp
+ * and load_vec and then causing a trap in TM. Since the endianness
+ * changes to BE on return from the signal handler, 'nop' is
+ * thread as an illegal instruction in following sequence:
+ *	tbegin.
+ *	beq 1f
+ *	trap
+ *	tend.
+ * 1:	nop
+ *
+ * However, although the issue is also present on BE machines, it's a
+ * bit trickier to check it on BE machines because MSR.LE bit is set
+ * to zero which determines a BE endianness that is the native
+ * endianness on BE machines, so nothing notably critical happens,
+ * i.e. no illegal instruction is observed immediately after returning
+ * from the signal handler (as it happens on LE machines). Thus to test
+ * it on BE machines LE endianness is forced after a first trap and then
+ * the endianness is verified on subsequent traps to determine if the
+ * endianness "flipped back" to the native endianness (BE).
+ */
+
+#define _GNU_SOURCE
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <htmintrin.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include "tm.h"
+#include "utils.h"
+
+#define pr_error(error_code, format, ...) \
+	error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__)
+
+#define MSR_LE 1UL
+#define LE     1UL
+
+pthread_t t0_ping;
+pthread_t t1_pong;
+
+int exit_from_pong;
+
+int trap_event;
+int le;
+
+bool success;
+
+void trap_signal_handler(int signo, siginfo_t *si, void *uc)
+{
+	ucontext_t *ucp = uc;
+	uint64_t thread_endianness;
+
+	/* Get thread endianness: extract bit LE from MSR */
+	thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR];
+
+	/***
+	 * Little-Endian Machine
+	 */
+
+	if (le) {
+		/* First trap event */
+		if (trap_event == 0) {
+			/* Do nothing. Since it is returning from this trap
+			 * event that endianness is flipped by the bug, so just
+			 * let the process return from the signal handler and
+			 * check on the second trap event if endianness is
+			 * flipped or not.
+			 */
+		}
+		/* Second trap event */
+		else if (trap_event == 1) {
+			/*
+			 * Since trap was caught in TM on first trap event, if
+			 * endianness was still LE (not flipped inadvertently)
+			 * after returning from the signal handler instruction
+			 * (1) is executed (basically a 'nop'), as it's located
+			 * at address of tbegin. +4 (rollback addr). As (1) on
+			 * LE endianness does in effect nothing, instruction (2)
+			 * is then executed again as 'trap', generating a second
+			 * trap event (note that in that case 'trap' is caught
+			 * not in transacional mode). On te other hand, if after
+			 * the return from the signal handler the endianness in-
+			 * advertently flipped, instruction (1) is tread as a
+			 * branch instruction, i.e. b .+8, hence instruction (3)
+			 * and (4) are executed (tbegin.; trap;) and we get sim-
+			 * ilaly on the trap signal handler, but now in TM mode.
+			 * Either way, it's now possible to check the MSR LE bit
+			 * once in the trap handler to verify if endianness was
+			 * flipped or not after the return from the second trap
+			 * event. If endianness is flipped, the bug is present.
+			 * Finally, getting a trap in TM mode or not is just
+			 * worth noting because it affects the math to determine
+			 * the offset added to the NIP on return: the NIP for a
+			 * trap caught in TM is the rollback address, i.e. the
+			 * next instruction after 'tbegin.', whilst the NIP for
+			 * a trap caught in non-transactional mode is the very
+			 * same address of the 'trap' instruction that generated
+			 * the trap event.
+			 */
+
+			if (thread_endianness == LE) {
+				/* Go to 'success', i.e. instruction (6) */
+				ucp->uc_mcontext.gp_regs[PT_NIP] += 16;
+			} else {
+				/*
+				 * Thread endianness is BE, so it flipped
+				 * inadvertently. Thus we flip back to LE and
+				 * set NIP to go to 'failure', instruction (5).
+				 */
+				ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+				ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+			}
+		}
+	}
+
+	/***
+	 * Big-Endian Machine
+	 */
+
+	else {
+		/* First trap event */
+		if (trap_event == 0) {
+			/*
+			 * Force thread endianness to be LE. Instructions (1),
+			 * (3), and (4) will be executed, generating a second
+			 * trap in TM mode.
+			 */
+			ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+		}
+		/* Second trap event */
+		else if (trap_event == 1) {
+			/*
+			 * Do nothing. If bug is present on return from this
+			 * second trap event endianness will flip back "automat-
+			 * ically" to BE, otherwise thread endianness will
+			 * continue to be LE, just as it was set above.
+			 */
+		}
+		/* A third trap event */
+		else {
+			/*
+			 * Once here it means that after returning from the sec-
+			 * ond trap event instruction (4) (trap) was executed
+			 * as LE, generating a third trap event. In that case
+			 * endianness is still LE as set on return from the
+			 * first trap event, hence no bug. Otherwise, bug
+			 * flipped back to BE on return from the second trap
+			 * event and instruction (4) was executed as 'tdi' (so
+			 * basically a 'nop') and branch to 'failure' in
+			 * instruction (5) was taken to indicate failure and we
+			 * never get here.
+			 */
+
+			/*
+			 * Flip back to BE and go to instruction (6), i.e. go to
+			 * 'success'.
+			 */
+			ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL;
+			ucp->uc_mcontext.gp_regs[PT_NIP] += 8;
+		}
+	}
+
+	trap_event++;
+}
+
+void usr1_signal_handler(int signo, siginfo_t *si, void *not_used)
+{
+	/* Got a USR1 signal from ping(), so just tell pong() to exit */
+	exit_from_pong = 1;
+}
+
+void *ping(void *not_used)
+{
+	uint64_t i;
+
+	trap_event = 0;
+
+	/*
+	 * Wait an amount of context switches so load_fp and load_vec overflows
+	 * and MSR_[FP|VEC|V] is 0.
+	 */
+	for (i = 0; i < 1024*1024*512; i++)
+		;
+
+	asm goto(
+		/*
+		 * [NA] means "Native Endianness", i.e. it tells how a
+		 * instruction is executed on machine's native endianness (in
+		 * other words, native endianness matches kernel endianness).
+		 * [OP] means "Opposite Endianness", i.e. on a BE machine, it
+		 * tells how a instruction is executed as a LE instruction; con-
+		 * versely, on a LE machine, it tells how a instruction is
+		 * executed as a BE instruction. When [NA] is omitted, it means
+		 * that the native interpretation of a given instruction is not
+		 * relevant for the test. Likewise when [OP] is omitted.
+		 */
+
+		" tbegin.        ;" /* (0) tbegin. [NA]                    */
+		" tdi  0, 0, 0x48;" /* (1) nop     [NA]; b (3) [OP]        */
+		" trap           ;" /* (2) trap    [NA]                    */
+		".long 0x1D05007C;" /* (3) tbegin. [OP]                    */
+		".long 0x0800E07F;" /* (4) trap    [OP]; nop   [NA]        */
+		" b %l[failure]  ;" /* (5) b [NA]; MSR.LE flipped (bug)    */
+		" b %l[success]  ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/
+
+		: : : : failure, success);
+
+failure:
+	success = false;
+	goto exit_from_ping;
+
+success:
+	success = true;
+
+exit_from_ping:
+	/* Tell pong() to exit before leaving */
+	pthread_kill(t1_pong, SIGUSR1);
+	return NULL;
+}
+
+void *pong(void *not_used)
+{
+	while (!exit_from_pong)
+		/*
+		 * Induce context switches on ping() thread
+		 * until ping() finishes its job and signs
+		 * to exit from this loop.
+		 */
+		sched_yield();
+
+	return NULL;
+}
+
+int tm_trap_test(void)
+{
+	uint16_t k = 1;
+
+	int rc;
+
+	pthread_attr_t attr;
+	cpu_set_t cpuset;
+
+	struct sigaction trap_sa;
+
+	SKIP_IF(!have_htm());
+
+	trap_sa.sa_flags = SA_SIGINFO;
+	trap_sa.sa_sigaction = trap_signal_handler;
+	sigaction(SIGTRAP, &trap_sa, NULL);
+
+	struct sigaction usr1_sa;
+
+	usr1_sa.sa_flags = SA_SIGINFO;
+	usr1_sa.sa_sigaction = usr1_signal_handler;
+	sigaction(SIGUSR1, &usr1_sa, NULL);
+
+	/* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+
+	/* Init pthread attribute */
+	rc = pthread_attr_init(&attr);
+	if (rc)
+		pr_error(rc, "pthread_attr_init()");
+
+	/*
+	 * Bind thread ping() and pong() both to CPU 0 so they ping-pong and
+	 * speed up context switches on ping() thread, speeding up the load_fp
+	 * and load_vec overflow.
+	 */
+	rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+	if (rc)
+		pr_error(rc, "pthread_attr_setaffinity()");
+
+	/* Figure out the machine endianness */
+	le = (int) *(uint8_t *)&k;
+
+	printf("%s machine detected. Checking if endianness flips %s",
+		le ? "Little-Endian" : "Big-Endian",
+		"inadvertently on trap in TM... ");
+
+	rc = fflush(0);
+	if (rc)
+		pr_error(rc, "fflush()");
+
+	/* Launch ping() */
+	rc = pthread_create(&t0_ping, &attr, ping, NULL);
+	if (rc)
+		pr_error(rc, "pthread_create()");
+
+	exit_from_pong = 0;
+
+	/* Launch pong() */
+	rc = pthread_create(&t1_pong, &attr, pong, NULL);
+	if (rc)
+		pr_error(rc, "pthread_create()");
+
+	rc = pthread_join(t0_ping, NULL);
+	if (rc)
+		pr_error(rc, "pthread_join()");
+
+	rc = pthread_join(t1_pong, NULL);
+	if (rc)
+		pr_error(rc, "pthread_join()");
+
+	if (success) {
+		printf("no.\n"); /* no, endianness did not flip inadvertently */
+		return EXIT_SUCCESS;
+	}
+
+	printf("yes!\n"); /* yes, endianness did flip inadvertently */
+	return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+	return test_harness(tm_trap_test, "tm_trap_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
new file mode 100644
index 000000000..09894f4ff
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright 2017, Gustavo Romero, Breno Leitao, Cyril Bur, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Force FP, VEC and VSX unavailable exception during transaction in all
+ * possible scenarios regarding the MSR.FP and MSR.VEC state, e.g. when FP
+ * is enable and VEC is disable, when FP is disable and VEC is enable, and
+ * so on. Then we check if the restored state is correctly set for the
+ * FP and VEC registers to the previous state we set just before we entered
+ * in TM, i.e. we check if it corrupts somehow the recheckpointed FP and
+ * VEC/Altivec registers on abortion due to an unavailable exception in TM.
+ * N.B. In this test we do not test all the FP/Altivec/VSX registers for
+ * corruption, but only for registers vs0 and vs32, which are respectively
+ * representatives of FP and VEC/Altivec reg sets.
+ */
+
+#define _GNU_SOURCE
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sched.h>
+
+#include "tm.h"
+
+#define DEBUG 0
+
+/* Unavailable exceptions to test in HTM */
+#define FP_UNA_EXCEPTION	0
+#define VEC_UNA_EXCEPTION	1
+#define VSX_UNA_EXCEPTION	2
+
+#define NUM_EXCEPTIONS		3
+#define err_at_line(status, errnum, format, ...) \
+	error_at_line(status, errnum,  __FILE__, __LINE__, format ##__VA_ARGS__)
+
+#define pr_warn(code, format, ...) err_at_line(0, code, format, ##__VA_ARGS__)
+#define pr_err(code, format, ...) err_at_line(1, code, format, ##__VA_ARGS__)
+
+struct Flags {
+	int touch_fp;
+	int touch_vec;
+	int result;
+	int exception;
+} flags;
+
+bool expecting_failure(void)
+{
+	if (flags.touch_fp && flags.exception == FP_UNA_EXCEPTION)
+		return false;
+
+	if (flags.touch_vec && flags.exception == VEC_UNA_EXCEPTION)
+		return false;
+
+	/*
+	 * If both FP and VEC are touched it does not mean that touching VSX
+	 * won't raise an exception. However since FP and VEC state are already
+	 * correctly loaded, the transaction is not aborted (i.e.
+	 * treclaimed/trecheckpointed) and MSR.VSX is just set as 1, so a TM
+	 * failure is not expected also in this case.
+	 */
+	if ((flags.touch_fp && flags.touch_vec) &&
+	     flags.exception == VSX_UNA_EXCEPTION)
+		return false;
+
+	return true;
+}
+
+/* Check if failure occurred whilst in transaction. */
+bool is_failure(uint64_t condition_reg)
+{
+	/*
+	 * When failure handling occurs, CR0 is set to 0b1010 (0xa). Otherwise
+	 * transaction completes without failure and hence reaches out 'tend.'
+	 * that sets CR0 to 0b0100 (0x4).
+	 */
+	return ((condition_reg >> 28) & 0xa) == 0xa;
+}
+
+void *tm_una_ping(void *input)
+{
+
+	/*
+	 * Expected values for vs0 and vs32 after a TM failure. They must never
+	 * change, otherwise they got corrupted.
+	 */
+	uint64_t high_vs0 = 0x5555555555555555;
+	uint64_t low_vs0 = 0xffffffffffffffff;
+	uint64_t high_vs32 = 0x5555555555555555;
+	uint64_t low_vs32 = 0xffffffffffffffff;
+
+	/* Counter for busy wait */
+	uint64_t counter = 0x1ff000000;
+
+	/*
+	 * Variable to keep a copy of CR register content taken just after we
+	 * leave the transactional state.
+	 */
+	uint64_t cr_ = 0;
+
+	/*
+	 * Wait a bit so thread can get its name "ping". This is not important
+	 * to reproduce the issue but it's nice to have for systemtap debugging.
+	 */
+	if (DEBUG)
+		sleep(1);
+
+	printf("If MSR.FP=%d MSR.VEC=%d: ", flags.touch_fp, flags.touch_vec);
+
+	if (flags.exception != FP_UNA_EXCEPTION &&
+	    flags.exception != VEC_UNA_EXCEPTION &&
+	    flags.exception != VSX_UNA_EXCEPTION) {
+		printf("No valid exception specified to test.\n");
+		return NULL;
+	}
+
+	asm (
+		/* Prepare to merge low and high. */
+		"	mtvsrd		33, %[high_vs0]		;"
+		"	mtvsrd		34, %[low_vs0]		;"
+
+		/*
+		 * Adjust VS0 expected value after an TM failure,
+		 * i.e. vs0 = 0x5555555555555555555FFFFFFFFFFFFFFFF
+		 */
+		"	xxmrghd		0, 33, 34		;"
+
+		/*
+		 * Adjust VS32 expected value after an TM failure,
+		 * i.e. vs32 = 0x5555555555555555555FFFFFFFFFFFFFFFF
+		 */
+		"	xxmrghd		32, 33, 34		;"
+
+		/*
+		 * Wait an amount of context switches so load_fp and load_vec
+		 * overflow and MSR.FP, MSR.VEC, and MSR.VSX become zero (off).
+		 */
+		"	mtctr		%[counter]		;"
+
+		/* Decrement CTR branch if CTR non zero. */
+		"1:	bdnz 1b					;"
+
+		/*
+		 * Check if we want to touch FP prior to the test in order
+		 * to set MSR.FP = 1 before provoking an unavailable
+		 * exception in TM.
+		 */
+		"	cmpldi		%[touch_fp], 0		;"
+		"	beq		no_fp			;"
+		"	fadd		10, 10, 10		;"
+		"no_fp:						;"
+
+		/*
+		 * Check if we want to touch VEC prior to the test in order
+		 * to set MSR.VEC = 1 before provoking an unavailable
+		 * exception in TM.
+		 */
+		"	cmpldi		%[touch_vec], 0		;"
+		"	beq		no_vec			;"
+		"	vaddcuw		10, 10, 10		;"
+		"no_vec:					;"
+
+		/*
+		 * Perhaps it would be a better idea to do the
+		 * compares outside transactional context and simply
+		 * duplicate code.
+		 */
+		"	tbegin.					;"
+		"	beq		trans_fail		;"
+
+		/* Do we do FP Unavailable? */
+		"	cmpldi		%[exception], %[ex_fp]	;"
+		"	bne		1f			;"
+		"	fadd		10, 10, 10		;"
+		"	b		done			;"
+
+		/* Do we do VEC Unavailable? */
+		"1:	cmpldi		%[exception], %[ex_vec]	;"
+		"	bne		2f			;"
+		"	vaddcuw		10, 10, 10		;"
+		"	b		done			;"
+
+		/*
+		 * Not FP or VEC, therefore VSX. Ensure this
+		 * instruction always generates a VSX Unavailable.
+		 * ISA 3.0 is tricky here.
+		 * (xxmrghd will on ISA 2.07 and ISA 3.0)
+		 */
+		"2:	xxmrghd		10, 10, 10		;"
+
+		"done:	tend. ;"
+
+		"trans_fail: ;"
+
+		/* Give values back to C. */
+		"	mfvsrd		%[high_vs0], 0		;"
+		"	xxsldwi		3, 0, 0, 2		;"
+		"	mfvsrd		%[low_vs0], 3		;"
+		"	mfvsrd		%[high_vs32], 32	;"
+		"	xxsldwi		3, 32, 32, 2		;"
+		"	mfvsrd		%[low_vs32], 3		;"
+
+		/* Give CR back to C so that it can check what happened. */
+		"	mfcr		%[cr_]		;"
+
+		: [high_vs0]  "+r" (high_vs0),
+		  [low_vs0]   "+r" (low_vs0),
+		  [high_vs32] "=r" (high_vs32),
+		  [low_vs32]  "=r" (low_vs32),
+		  [cr_]       "+r" (cr_)
+		: [touch_fp]  "r"  (flags.touch_fp),
+		  [touch_vec] "r"  (flags.touch_vec),
+		  [exception] "r"  (flags.exception),
+		  [ex_fp]     "i"  (FP_UNA_EXCEPTION),
+		  [ex_vec]    "i"  (VEC_UNA_EXCEPTION),
+		  [ex_vsx]    "i"  (VSX_UNA_EXCEPTION),
+		  [counter]   "r"  (counter)
+
+		: "cr0", "ctr", "v10", "vs0", "vs10", "vs3", "vs32", "vs33",
+		  "vs34", "fr10"
+
+		);
+
+	/*
+	 * Check if we were expecting a failure and it did not occur by checking
+	 * CR0 state just after we leave the transaction. Either way we check if
+	 * vs0 or vs32 got corrupted.
+	 */
+	if (expecting_failure() && !is_failure(cr_)) {
+		printf("\n\tExpecting the transaction to fail, %s",
+			"but it didn't\n\t");
+		flags.result++;
+	}
+
+	/* Check if we were not expecting a failure and a it occurred. */
+	if (!expecting_failure() && is_failure(cr_) &&
+	    !failure_is_reschedule()) {
+		printf("\n\tUnexpected transaction failure 0x%02lx\n\t",
+			failure_code());
+		return (void *) -1;
+	}
+
+	/*
+	 * Check if TM failed due to the cause we were expecting. 0xda is a
+	 * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause, unless
+	 * it was caused by a reschedule.
+	 */
+	if (is_failure(cr_) && !failure_is_unavailable() &&
+	    !failure_is_reschedule()) {
+		printf("\n\tUnexpected failure cause 0x%02lx\n\t",
+			failure_code());
+		return (void *) -1;
+	}
+
+	/* 0x4 is a success and 0xa is a fail. See comment in is_failure(). */
+	if (DEBUG)
+		printf("CR0: 0x%1lx ", cr_ >> 28);
+
+	/* Check FP (vs0) for the expected value. */
+	if (high_vs0 != 0x5555555555555555 || low_vs0 != 0xFFFFFFFFFFFFFFFF) {
+		printf("FP corrupted!");
+			printf("  high = %#16" PRIx64 "  low = %#16" PRIx64 " ",
+				high_vs0, low_vs0);
+		flags.result++;
+	} else
+		printf("FP ok ");
+
+	/* Check VEC (vs32) for the expected value. */
+	if (high_vs32 != 0x5555555555555555 || low_vs32 != 0xFFFFFFFFFFFFFFFF) {
+		printf("VEC corrupted!");
+			printf("  high = %#16" PRIx64 "  low = %#16" PRIx64,
+				high_vs32, low_vs32);
+		flags.result++;
+	} else
+		printf("VEC ok");
+
+	putchar('\n');
+
+	return NULL;
+}
+
+/* Thread to force context switch */
+void *tm_una_pong(void *not_used)
+{
+	/* Wait thread get its name "pong". */
+	if (DEBUG)
+		sleep(1);
+
+	/* Classed as an interactive-like thread. */
+	while (1)
+		sched_yield();
+}
+
+/* Function that creates a thread and launches the "ping" task. */
+void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
+{
+	int retries = 2;
+	void *ret_value;
+	pthread_t t0;
+
+	flags.touch_fp = fp;
+	flags.touch_vec = vec;
+
+	/*
+	 * Without luck it's possible that the transaction is aborted not due to
+	 * the unavailable exception caught in the middle as we expect but also,
+	 * for instance, due to a context switch or due to a KVM reschedule (if
+	 * it's running on a VM). Thus we try a few times before giving up,
+	 * checking if the failure cause is the one we expect.
+	 */
+	do {
+		int rc;
+
+		/* Bind to CPU 0, as specified in 'attr'. */
+		rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags);
+		if (rc)
+			pr_err(rc, "pthread_create()");
+		rc = pthread_setname_np(t0, "tm_una_ping");
+		if (rc)
+			pr_warn(rc, "pthread_setname_np");
+		rc = pthread_join(t0, &ret_value);
+		if (rc)
+			pr_err(rc, "pthread_join");
+
+		retries--;
+	} while (ret_value != NULL && retries);
+
+	if (!retries) {
+		flags.result = 1;
+		if (DEBUG)
+			printf("All transactions failed unexpectedly\n");
+
+	}
+}
+
+int tm_unavailable_test(void)
+{
+	int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
+	pthread_t t1;
+	pthread_attr_t attr;
+	cpu_set_t cpuset;
+
+	SKIP_IF(!have_htm());
+
+	/* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+
+	/* Init pthread attribute. */
+	rc = pthread_attr_init(&attr);
+	if (rc)
+		pr_err(rc, "pthread_attr_init()");
+
+	/* Set CPU 0 mask into the pthread attribute. */
+	rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+	if (rc)
+		pr_err(rc, "pthread_attr_setaffinity_np()");
+
+	rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL);
+	if (rc)
+		pr_err(rc, "pthread_create()");
+
+	/* Name it for systemtap convenience */
+	rc = pthread_setname_np(t1, "tm_una_pong");
+	if (rc)
+		pr_warn(rc, "pthread_create()");
+
+	flags.result = 0;
+
+	for (exception = 0; exception < NUM_EXCEPTIONS; exception++) {
+		printf("Checking if FP/VEC registers are sane after");
+
+		if (exception == FP_UNA_EXCEPTION)
+			printf(" a FP unavailable exception...\n");
+
+		else if (exception == VEC_UNA_EXCEPTION)
+			printf(" a VEC unavailable exception...\n");
+
+		else
+			printf(" a VSX unavailable exception...\n");
+
+		flags.exception = exception;
+
+		test_fp_vec(0, 0, &attr);
+		test_fp_vec(1, 0, &attr);
+		test_fp_vec(0, 1, &attr);
+		test_fp_vec(1, 1, &attr);
+
+	}
+
+	if (flags.result > 0) {
+		printf("result: failed!\n");
+		exit(1);
+	} else {
+		printf("result: success\n");
+		exit(0);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	test_harness_set_timeout(220);
+	return test_harness(tm_unavailable_test, "tm_unavailable_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
new file mode 100644
index 000000000..137185ba4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2017, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ * Original: Breno Leitao <brenohl@br.ibm.com> &
+ *           Gustavo Bueno Romero <gromero@br.ibm.com>
+ * Edited: Michael Neuling
+ *
+ * Force VMX unavailable during a transaction and see if it corrupts
+ * the checkpointed VMX register state after the abort.
+ */
+
+#include <inttypes.h>
+#include <htmintrin.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int passed;
+
+void *worker(void *unused)
+{
+	__int128 vmx0;
+	uint64_t texasr;
+
+	asm goto (
+		"li       3, 1;"  /* Stick non-zero value in VMX0 */
+		"std      3, 0(%[vmx0_ptr]);"
+		"lvx      0, 0, %[vmx0_ptr];"
+
+		/* Wait here a bit so we get scheduled out 255 times */
+		"lis      3, 0x3fff;"
+		"1: ;"
+		"addi     3, 3, -1;"
+		"cmpdi    3, 0;"
+		"bne      1b;"
+
+		/* Kernel will hopefully turn VMX off now */
+
+		"tbegin. ;"
+		"beq      failure;"
+
+		/* Cause VMX unavail. Any VMX instruction */
+		"vaddcuw  0,0,0;"
+
+		"tend. ;"
+		"b        %l[success];"
+
+		/* Check VMX0 sanity after abort */
+		"failure: ;"
+		"lvx       1,  0, %[vmx0_ptr];"
+		"vcmpequb. 2,  0, 1;"
+		"bc        4, 24, %l[value_mismatch];"
+		"b        %l[value_match];"
+		:
+		: [vmx0_ptr] "r"(&vmx0)
+		: "r3"
+		: success, value_match, value_mismatch
+		);
+
+	/* HTM aborted and VMX0 is corrupted */
+value_mismatch:
+	texasr = __builtin_get_texasr();
+
+	printf("\n\n==============\n\n");
+	printf("Failure with error: %lx\n",   _TEXASR_FAILURE_CODE(texasr));
+	printf("Summary error     : %lx\n",   _TEXASR_FAILURE_SUMMARY(texasr));
+	printf("TFIAR exact       : %lx\n\n", _TEXASR_TFIAR_EXACT(texasr));
+
+	passed = 0;
+	return NULL;
+
+	/* HTM aborted but VMX0 is correct */
+value_match:
+//	printf("!");
+	return NULL;
+
+success:
+//	printf(".");
+	return NULL;
+}
+
+int tm_vmx_unavail_test()
+{
+	int threads;
+	pthread_t *thread;
+
+	SKIP_IF(!have_htm());
+
+	passed = 1;
+
+	threads = sysconf(_SC_NPROCESSORS_ONLN) * 4;
+	thread = malloc(sizeof(pthread_t)*threads);
+	if (!thread)
+		return EXIT_FAILURE;
+
+	for (uint64_t i = 0; i < threads; i++)
+		pthread_create(&thread[i], NULL, &worker, NULL);
+
+	for (uint64_t i = 0; i < threads; i++)
+		pthread_join(thread[i], NULL);
+
+	free(thread);
+
+	return passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+
+int main(int argc, char **argv)
+{
+	return test_harness(tm_vmx_unavail_test, "tm_vmx_unavail_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
new file mode 100644
index 000000000..fe5281158
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Original: Michael Neuling 4/12/2013
+ * Edited: Rashmica Gupta 4/12/2015
+ *
+ * See if the altivec state is leaked out of an aborted transaction due to
+ * kernel vmx copy loops.
+ *
+ * When the transaction aborts, VSR values should rollback to the values
+ * they held before the transaction commenced. Using VSRs while transaction
+ * is suspended should not affect the checkpointed values.
+ *
+ * (1) write A to a VSR
+ * (2) start transaction
+ * (3) suspend transaction
+ * (4) change the VSR to B
+ * (5) trigger kernel vmx copy loop
+ * (6) abort transaction
+ * (7) check that the VSR value is A
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <assert.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int test_vmxcopy()
+{
+	long double vecin = 1.3;
+	long double vecout;
+	unsigned long pgsize = getpagesize();
+	int i;
+	int fd;
+	int size = pgsize*16;
+	char tmpfile[] = "/tmp/page_faultXXXXXX";
+	char buf[pgsize];
+	char *a;
+	uint64_t aborted = 0;
+
+	SKIP_IF(!have_htm());
+	SKIP_IF(!is_ppc64le());
+
+	fd = mkstemp(tmpfile);
+	assert(fd >= 0);
+
+	memset(buf, 0, pgsize);
+	for (i = 0; i < size; i += pgsize)
+		assert(write(fd, buf, pgsize) == pgsize);
+
+	unlink(tmpfile);
+
+	a = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+	assert(a != MAP_FAILED);
+
+	asm __volatile__(
+		"lxvd2x 40,0,%[vecinptr];"	/* set 40 to initial value*/
+		"tbegin.;"
+		"beq	3f;"
+		"tsuspend.;"
+		"xxlxor 40,40,40;"		/* set 40 to 0 */
+		"std	5, 0(%[map]);"		/* cause kernel vmx copy page */
+		"tabort. 0;"
+		"tresume.;"
+		"tend.;"
+		"li	%[res], 0;"
+		"b	5f;"
+
+		/* Abort handler */
+		"3:;"
+		"li	%[res], 1;"
+
+		"5:;"
+		"stxvd2x 40,0,%[vecoutptr];"
+		: [res]"=r"(aborted)
+		: [vecinptr]"r"(&vecin),
+		  [vecoutptr]"r"(&vecout),
+		  [map]"r"(a)
+		: "memory", "r0", "r3", "r4", "r5", "r6", "r7");
+
+	if (aborted && (vecin != vecout)){
+		printf("FAILED: vector state leaked on abort %f != %f\n",
+		       (double)vecin, (double)vecout);
+		return 1;
+	}
+
+	munmap(a, size);
+
+	close(fd);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_vmxcopy, "tm_vmxcopy");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
new file mode 100644
index 000000000..5518b1d4e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2015, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_TM_TM_H
+#define _SELFTESTS_POWERPC_TM_TM_H
+
+#include <asm/tm.h>
+#include <asm/cputable.h>
+#include <stdbool.h>
+
+#include "utils.h"
+
+static inline bool have_htm(void)
+{
+#ifdef PPC_FEATURE2_HTM
+	return have_hwcap2(PPC_FEATURE2_HTM);
+#else
+	printf("PPC_FEATURE2_HTM not defined, can't check AT_HWCAP2\n");
+	return false;
+#endif
+}
+
+static inline bool have_htm_nosc(void)
+{
+#ifdef PPC_FEATURE2_HTM_NOSC
+	return have_hwcap2(PPC_FEATURE2_HTM_NOSC);
+#else
+	printf("PPC_FEATURE2_HTM_NOSC not defined, can't check AT_HWCAP2\n");
+	return false;
+#endif
+}
+
+static inline long failure_code(void)
+{
+	return __builtin_get_texasru() >> 24;
+}
+
+static inline bool failure_is_persistent(void)
+{
+	return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT;
+}
+
+static inline bool failure_is_syscall(void)
+{
+	return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL;
+}
+
+static inline bool failure_is_unavailable(void)
+{
+	return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV;
+}
+
+static inline bool failure_is_reschedule(void)
+{
+	if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED ||
+	    (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED)
+		return true;
+
+	return false;
+}
+
+static inline bool failure_is_nesting(void)
+{
+	return (__builtin_get_texasru() & 0x400000);
+}
+
+static inline int tcheck(void)
+{
+	long cr;
+	asm volatile ("tcheck 0" : "=r"(cr) : : "cr0");
+	return (cr >> 28) & 4;
+}
+
+static inline bool tcheck_doomed(void)
+{
+	return tcheck() & 8;
+}
+
+static inline bool tcheck_active(void)
+{
+	return tcheck() & 4;
+}
+
+static inline bool tcheck_suspended(void)
+{
+	return tcheck() & 2;
+}
+
+static inline bool tcheck_transactional(void)
+{
+	return tcheck() & 6;
+}
+
+#endif /* _SELFTESTS_POWERPC_TM_TM_H */
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
new file mode 100644
index 000000000..ba0959d45
--- /dev/null
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2013-2015, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE	/* For CPU_ZERO etc. */
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <link.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+static char auxv[4096];
+
+int read_auxv(char *buf, ssize_t buf_size)
+{
+	ssize_t num;
+	int rc, fd;
+
+	fd = open("/proc/self/auxv", O_RDONLY);
+	if (fd == -1) {
+		perror("open");
+		return -errno;
+	}
+
+	num = read(fd, buf, buf_size);
+	if (num < 0) {
+		perror("read");
+		rc = -EIO;
+		goto out;
+	}
+
+	if (num > buf_size) {
+		printf("overflowed auxv buffer\n");
+		rc = -EOVERFLOW;
+		goto out;
+	}
+
+	rc = 0;
+out:
+	close(fd);
+	return rc;
+}
+
+void *find_auxv_entry(int type, char *auxv)
+{
+	ElfW(auxv_t) *p;
+
+	p = (ElfW(auxv_t) *)auxv;
+
+	while (p->a_type != AT_NULL) {
+		if (p->a_type == type)
+			return p;
+
+		p++;
+	}
+
+	return NULL;
+}
+
+void *get_auxv_entry(int type)
+{
+	ElfW(auxv_t) *p;
+
+	if (read_auxv(auxv, sizeof(auxv)))
+		return NULL;
+
+	p = find_auxv_entry(type, auxv);
+	if (p)
+		return (void *)p->a_un.a_val;
+
+	return NULL;
+}
+
+int pick_online_cpu(void)
+{
+	int ncpus, cpu = -1;
+	cpu_set_t *mask;
+	size_t size;
+
+	ncpus = get_nprocs_conf();
+	size = CPU_ALLOC_SIZE(ncpus);
+	mask = CPU_ALLOC(ncpus);
+	if (!mask) {
+		perror("malloc");
+		return -1;
+	}
+
+	CPU_ZERO_S(size, mask);
+
+	if (sched_getaffinity(0, size, mask)) {
+		perror("sched_getaffinity");
+		goto done;
+	}
+
+	/* We prefer a primary thread, but skip 0 */
+	for (cpu = 8; cpu < ncpus; cpu += 8)
+		if (CPU_ISSET_S(cpu, size, mask))
+			goto done;
+
+	/* Search for anything, but in reverse */
+	for (cpu = ncpus - 1; cpu >= 0; cpu--)
+		if (CPU_ISSET_S(cpu, size, mask))
+			goto done;
+
+	printf("No cpus in affinity mask?!\n");
+
+done:
+	CPU_FREE(mask);
+	return cpu;
+}
+
+bool is_ppc64le(void)
+{
+	struct utsname uts;
+	int rc;
+
+	errno = 0;
+	rc = uname(&uts);
+	if (rc) {
+		perror("uname");
+		return false;
+	}
+
+	return strcmp(uts.machine, "ppc64le") == 0;
+}
diff --git a/tools/testing/selftests/powerpc/vphn/.gitignore b/tools/testing/selftests/powerpc/vphn/.gitignore
new file mode 100644
index 000000000..7c0439501
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/.gitignore
@@ -0,0 +1 @@
+test-vphn
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
new file mode 100644
index 000000000..fb82068c9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -0,0 +1,9 @@
+TEST_GEN_PROGS := test-vphn
+
+CFLAGS += -m64
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+
diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c
new file mode 100644
index 000000000..81d3069ff
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <byteswap.h>
+#include "utils.h"
+#include "subunit.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x)		bswap_32(x)
+#define be32_to_cpu(x)		bswap_32(x)
+#define be16_to_cpup(x)		bswap_16(*x)
+#define cpu_to_be64(x)		bswap_64(x)
+#else
+#define cpu_to_be32(x)		(x)
+#define be32_to_cpu(x)		(x)
+#define be16_to_cpup(x)		(*x)
+#define cpu_to_be64(x)		(x)
+#endif
+
+#include "vphn.c"
+
+static struct test {
+	char *descr;
+	long input[VPHN_REGISTER_COUNT];
+	u32 expected[VPHN_ASSOC_BUFSIZE];
+} all_tests[] = {
+	{
+		"vphn: no data",
+		{
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000000
+		}
+	},
+	{
+		"vphn: 1 x 16-bit value",
+		{
+			0x8001ffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000001,
+			0x00000001
+		}
+	},
+	{
+		"vphn: 2 x 16-bit values",
+		{
+			0x80018002ffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000002,
+			0x00000001,
+			0x00000002
+		}
+	},
+	{
+		"vphn: 3 x 16-bit values",
+		{
+			0x800180028003ffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000003,
+			0x00000001,
+			0x00000002,
+			0x00000003
+		}
+	},
+	{
+		"vphn: 4 x 16-bit values",
+		{
+			0x8001800280038004,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000004,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004
+		}
+	},
+	{
+		/* Parsing the next 16-bit value out of the next 64-bit input
+		 * value.
+		 */
+		"vphn: 5 x 16-bit values",
+		{
+			0x8001800280038004,
+			0x8005ffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+		},
+		{
+			0x00000005,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004,
+			0x00000005
+		}
+	},
+	{
+		/* Parse at most 6 x 64-bit input values */
+		"vphn: 24 x 16-bit values",
+		{
+			0x8001800280038004,
+			0x8005800680078008,
+			0x8009800a800b800c,
+			0x800d800e800f8010,
+			0x8011801280138014,
+			0x8015801680178018
+		},
+		{
+			0x00000018,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004,
+			0x00000005,
+			0x00000006,
+			0x00000007,
+			0x00000008,
+			0x00000009,
+			0x0000000a,
+			0x0000000b,
+			0x0000000c,
+			0x0000000d,
+			0x0000000e,
+			0x0000000f,
+			0x00000010,
+			0x00000011,
+			0x00000012,
+			0x00000013,
+			0x00000014,
+			0x00000015,
+			0x00000016,
+			0x00000017,
+			0x00000018
+		}
+	},
+	{
+		"vphn: 1 x 32-bit value",
+		{
+			0x00000001ffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000001,
+			0x00000001
+		}
+	},
+	{
+		"vphn: 2 x 32-bit values",
+		{
+			0x0000000100000002,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000002,
+			0x00000001,
+			0x00000002
+		}
+	},
+	{
+		/* Parsing the next 32-bit value out of the next 64-bit input
+		 * value.
+		 */
+		"vphn: 3 x 32-bit values",
+		{
+			0x0000000100000002,
+			0x00000003ffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000003,
+			0x00000001,
+			0x00000002,
+			0x00000003
+		}
+	},
+	{
+		/* Parse at most 6 x 64-bit input values */
+		"vphn: 12 x 32-bit values",
+		{
+			0x0000000100000002,
+			0x0000000300000004,
+			0x0000000500000006,
+			0x0000000700000008,
+			0x000000090000000a,
+			0x0000000b0000000c
+		},
+		{
+			0x0000000c,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004,
+			0x00000005,
+			0x00000006,
+			0x00000007,
+			0x00000008,
+			0x00000009,
+			0x0000000a,
+			0x0000000b,
+			0x0000000c
+		}
+	},
+	{
+		"vphn: 16-bit value followed by 32-bit value",
+		{
+			0x800100000002ffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000002,
+			0x00000001,
+			0x00000002
+		}
+	},
+	{
+		"vphn: 32-bit value followed by 16-bit value",
+		{
+			0x000000018002ffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000002,
+			0x00000001,
+			0x00000002
+		}
+	},
+	{
+		/* Parse a 32-bit value split accross two consecutives 64-bit
+		 * input values.
+		 */
+		"vphn: 16-bit value followed by 2 x 32-bit values",
+		{
+			0x8001000000020000,
+			0x0003ffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000003,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004,
+			0x00000005
+		}
+	},
+	{
+		/* The lower bits in 0x0001ffff don't get mixed up with the
+		 * 0xffff terminator.
+		 */
+		"vphn: 32-bit value has all ones in 16 lower bits",
+		{
+			0x0001ffff80028003,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff,
+			0xffffffffffffffff
+		},
+		{
+			0x00000003,
+			0x0001ffff,
+			0x00000002,
+			0x00000003
+		}
+	},
+	{
+		/* The following input doesn't follow the specification.
+		 */
+		"vphn: last 32-bit value is truncated",
+		{
+			0x0000000100000002,
+			0x0000000300000004,
+			0x0000000500000006,
+			0x0000000700000008,
+			0x000000090000000a,
+			0x0000000b800c2bad
+		},
+		{
+			0x0000000c,
+			0x00000001,
+			0x00000002,
+			0x00000003,
+			0x00000004,
+			0x00000005,
+			0x00000006,
+			0x00000007,
+			0x00000008,
+			0x00000009,
+			0x0000000a,
+			0x0000000b,
+			0x0000000c
+		}
+	},
+	{
+		"vphn: garbage after terminator",
+		{
+			0xffff2bad2bad2bad,
+			0x2bad2bad2bad2bad,
+			0x2bad2bad2bad2bad,
+			0x2bad2bad2bad2bad,
+			0x2bad2bad2bad2bad,
+			0x2bad2bad2bad2bad
+		},
+		{
+			0x00000000
+		}
+	},
+	{
+		NULL
+	}
+};
+
+static int test_one(struct test *test)
+{
+	__be32 output[VPHN_ASSOC_BUFSIZE] = { 0 };
+	int i, len;
+
+	vphn_unpack_associativity(test->input, output);
+
+	len = be32_to_cpu(output[0]);
+	if (len != test->expected[0]) {
+		printf("expected %d elements, got %d\n", test->expected[0],
+		       len);
+		return 1;
+	}
+
+	for (i = 1; i < len; i++) {
+		u32 val = be32_to_cpu(output[i]);
+		if (val != test->expected[i]) {
+			printf("element #%d is 0x%x, should be 0x%x\n", i, val,
+			       test->expected[i]);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static int test_vphn(void)
+{
+	static struct test *test;
+
+	for (test = all_tests; test->descr; test++) {
+		int ret;
+
+		ret = test_one(test);
+		test_finish(test->descr, ret);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	return test_harness(test_vphn, "test-vphn");
+}
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.c b/tools/testing/selftests/powerpc/vphn/vphn.c
new file mode 120000
index 000000000..186b906e6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/vphn.c
@@ -0,0 +1 @@
+../../../../../arch/powerpc/mm/vphn.c
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.h b/tools/testing/selftests/powerpc/vphn/vphn.h
new file mode 120000
index 000000000..7131efe38
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/vphn.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/mm/vphn.h
+\ No newline at end of file
diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore
new file mode 100644
index 000000000..0b5c27447
--- /dev/null
+++ b/tools/testing/selftests/prctl/.gitignore
@@ -0,0 +1,3 @@
+disable-tsc-ctxt-sw-stress-test
+disable-tsc-on-off-stress-test
+disable-tsc-test
diff --git a/tools/testing/selftests/prctl/Makefile b/tools/testing/selftests/prctl/Makefile
new file mode 100644
index 000000000..c7923b205
--- /dev/null
+++ b/tools/testing/selftests/prctl/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_PROGS := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test \
+		disable-tsc-test
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+	rm -fr $(TEST_PROGS)
+endif
+endif
diff --git a/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c
new file mode 100644
index 000000000..62a93cc61
--- /dev/null
+++ b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * at context switches
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE		1   /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV		2   /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+static void sigsegv_expect(int sig)
+{
+	/* */
+}
+
+static void segvtask(void)
+{
+	if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+	{
+		perror("prctl");
+		exit(0);
+	}
+	signal(SIGSEGV, sigsegv_expect);
+	alarm(10);
+	rdtsc();
+	fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+	exit(0);
+}
+
+
+static void sigsegv_fail(int sig)
+{
+	fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+	exit(0);
+}
+
+static void rdtsctask(void)
+{
+	if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+	{
+		perror("prctl");
+		exit(0);
+	}
+	signal(SIGSEGV, sigsegv_fail);
+	alarm(10);
+	for(;;) rdtsc();
+}
+
+
+int main(void)
+{
+	int n_tasks = 100, i;
+
+	fprintf(stderr, "[No further output means we're allright]\n");
+
+	for (i=0; i<n_tasks; i++)
+		if (fork() == 0)
+		{
+			if (i & 1)
+				segvtask();
+			else
+				rdtsctask();
+		}
+
+	for (i=0; i<n_tasks; i++)
+		wait(NULL);
+
+	exit(0);
+}
+
diff --git a/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c
new file mode 100644
index 000000000..79950f9a2
--- /dev/null
+++ b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * when set with prctl()
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE		1   /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV		2   /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+/* snippet from wikipedia :-) */
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+int should_segv = 0;
+
+static void sigsegv_cb(int sig)
+{
+	if (!should_segv)
+	{
+		fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+		exit(0);
+	}
+	if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+	{
+		perror("prctl");
+		exit(0);
+	}
+	should_segv = 0;
+
+	rdtsc();
+}
+
+static void task(void)
+{
+	signal(SIGSEGV, sigsegv_cb);
+	alarm(10);
+	for(;;)
+	{
+		rdtsc();
+		if (should_segv)
+		{
+			fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+			exit(0);
+		}
+		if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+		{
+			perror("prctl");
+			exit(0);
+		}
+		should_segv = 1;
+	}
+}
+
+
+int main(void)
+{
+	int n_tasks = 100, i;
+
+	fprintf(stderr, "[No further output means we're allright]\n");
+
+	for (i=0; i<n_tasks; i++)
+		if (fork() == 0)
+			task();
+
+	for (i=0; i<n_tasks; i++)
+		wait(NULL);
+
+	exit(0);
+}
+
diff --git a/tools/testing/selftests/prctl/disable-tsc-test.c b/tools/testing/selftests/prctl/disable-tsc-test.c
new file mode 100644
index 000000000..f84d4ee11
--- /dev/null
+++ b/tools/testing/selftests/prctl/disable-tsc-test.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE		1   /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV		2   /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+const char *tsc_names[] =
+{
+	[0] = "[not set]",
+	[PR_TSC_ENABLE] = "PR_TSC_ENABLE",
+	[PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV",
+};
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+static void sigsegv_cb(int sig)
+{
+	int tsc_val = 0;
+
+	printf("[ SIG_SEGV ]\n");
+	printf("prctl(PR_GET_TSC, &tsc_val); ");
+	fflush(stdout);
+
+	if ( prctl(PR_GET_TSC, &tsc_val) == -1)
+		perror("prctl");
+
+	printf("tsc_val == %s\n", tsc_names[tsc_val]);
+	printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
+	fflush(stdout);
+	if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
+		perror("prctl");
+
+	printf("rdtsc() == ");
+}
+
+int main(void)
+{
+	int tsc_val = 0;
+
+	signal(SIGSEGV, sigsegv_cb);
+
+	printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+	printf("prctl(PR_GET_TSC, &tsc_val); ");
+	fflush(stdout);
+
+	if ( prctl(PR_GET_TSC, &tsc_val) == -1)
+		perror("prctl");
+
+	printf("tsc_val == %s\n", tsc_names[tsc_val]);
+	printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+	printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
+	fflush(stdout);
+
+	if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
+		perror("prctl");
+
+	printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+	printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n");
+	fflush(stdout);
+
+	if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1)
+		perror("prctl");
+
+	printf("rdtsc() == ");
+	fflush(stdout);
+	printf("%llu\n", (unsigned long long)rdtsc());
+	fflush(stdout);
+
+	exit(EXIT_SUCCESS);
+}
+
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
new file mode 100644
index 000000000..29bac5ef9
--- /dev/null
+++ b/tools/testing/selftests/proc/.gitignore
@@ -0,0 +1,14 @@
+/fd-001-lookup
+/fd-002-posix-eq
+/fd-003-kthread
+/proc-loadavg-001
+/proc-self-map-files-001
+/proc-self-map-files-002
+/proc-self-syscall
+/proc-self-wchan
+/proc-uptime-001
+/proc-uptime-002
+/read
+/self
+/setns-dcache
+/thread-self
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
new file mode 100644
index 000000000..434d033ee
--- /dev/null
+++ b/tools/testing/selftests/proc/Makefile
@@ -0,0 +1,20 @@
+CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += -D_GNU_SOURCE
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += fd-001-lookup
+TEST_GEN_PROGS += fd-002-posix-eq
+TEST_GEN_PROGS += fd-003-kthread
+TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-self-map-files-001
+TEST_GEN_PROGS += proc-self-map-files-002
+TEST_GEN_PROGS += proc-self-syscall
+TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-uptime-001
+TEST_GEN_PROGS += proc-uptime-002
+TEST_GEN_PROGS += read
+TEST_GEN_PROGS += self
+TEST_GEN_PROGS += setns-dcache
+TEST_GEN_PROGS += thread-self
+
+include ../lib.mk
diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config
new file mode 100644
index 000000000..68fbd2b35
--- /dev/null
+++ b/tools/testing/selftests/proc/config
@@ -0,0 +1 @@
+CONFIG_PROC_FS=y
diff --git a/tools/testing/selftests/proc/fd-001-lookup.c b/tools/testing/selftests/proc/fd-001-lookup.c
new file mode 100644
index 000000000..60d7948e7
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-001-lookup.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test /proc/*/fd lookup.
+
+#undef NDEBUG
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc.h"
+
+/* lstat(2) has more "coverage" in case non-symlink pops up somehow. */
+static void test_lookup_pass(const char *pathname)
+{
+	struct stat st;
+	ssize_t rv;
+
+	memset(&st, 0, sizeof(struct stat));
+	rv = lstat(pathname, &st);
+	assert(rv == 0);
+	assert(S_ISLNK(st.st_mode));
+}
+
+static void test_lookup_fail(const char *pathname)
+{
+	struct stat st;
+	ssize_t rv;
+
+	rv = lstat(pathname, &st);
+	assert(rv == -1 && errno == ENOENT);
+}
+
+static void test_lookup(unsigned int fd)
+{
+	char buf[64];
+	unsigned int c;
+	unsigned int u;
+	int i;
+
+	snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
+	test_lookup_pass(buf);
+
+	/* leading junk */
+	for (c = 1; c <= 255; c++) {
+		if (c == '/')
+			continue;
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%c%u", c, fd);
+		test_lookup_fail(buf);
+	}
+
+	/* trailing junk */
+	for (c = 1; c <= 255; c++) {
+		if (c == '/')
+			continue;
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%u%c", fd, c);
+		test_lookup_fail(buf);
+	}
+
+	for (i = INT_MIN; i < INT_MIN + 1024; i++) {
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
+		test_lookup_fail(buf);
+	}
+	for (i = -1024; i < 0; i++) {
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
+		test_lookup_fail(buf);
+	}
+	for (u = INT_MAX - 1024; u <= (unsigned int)INT_MAX + 1024; u++) {
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
+		test_lookup_fail(buf);
+	}
+	for (u = UINT_MAX - 1024; u != 0; u++) {
+		snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
+		test_lookup_fail(buf);
+	}
+
+
+}
+
+int main(void)
+{
+	struct dirent *de;
+	unsigned int fd, target_fd;
+
+	if (unshare(CLONE_FILES) == -1)
+		return 1;
+
+	/* Wipe fdtable. */
+	do {
+		DIR *d;
+
+		d = opendir("/proc/self/fd");
+		if (!d)
+			return 1;
+
+		de = xreaddir(d);
+		assert(de->d_type == DT_DIR);
+		assert(streq(de->d_name, "."));
+
+		de = xreaddir(d);
+		assert(de->d_type == DT_DIR);
+		assert(streq(de->d_name, ".."));
+next:
+		de = xreaddir(d);
+		if (de) {
+			unsigned long long fd_ull;
+			unsigned int fd;
+			char *end;
+
+			assert(de->d_type == DT_LNK);
+
+			fd_ull = xstrtoull(de->d_name, &end);
+			assert(*end == '\0');
+			assert(fd_ull == (unsigned int)fd_ull);
+
+			fd = fd_ull;
+			if (fd == dirfd(d))
+				goto next;
+			close(fd);
+		}
+
+		closedir(d);
+	} while (de);
+
+	/* Now fdtable is clean. */
+
+	fd = open("/", O_PATH|O_DIRECTORY);
+	assert(fd == 0);
+	test_lookup(fd);
+	close(fd);
+
+	/* Clean again! */
+
+	fd = open("/", O_PATH|O_DIRECTORY);
+	assert(fd == 0);
+	/* Default RLIMIT_NOFILE-1 */
+	target_fd = 1023;
+	while (target_fd > 0) {
+		if (dup2(fd, target_fd) == target_fd)
+			break;
+		target_fd /= 2;
+	}
+	assert(target_fd > 0);
+	close(fd);
+	test_lookup(target_fd);
+	close(target_fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/fd-002-posix-eq.c b/tools/testing/selftests/proc/fd-002-posix-eq.c
new file mode 100644
index 000000000..417322ca9
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-002-posix-eq.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that open(/proc/*/fd/*) opens the same file.
+#undef NDEBUG
+#include <assert.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+int main(void)
+{
+	int fd0, fd1, fd2;
+	struct stat st0, st1, st2;
+	char buf[64];
+	int rv;
+
+	fd0 = open("/", O_DIRECTORY|O_RDONLY);
+	assert(fd0 >= 0);
+
+	snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd0);
+	fd1 = open(buf, O_RDONLY);
+	assert(fd1 >= 0);
+
+	snprintf(buf, sizeof(buf), "/proc/thread-self/fd/%u", fd0);
+	fd2 = open(buf, O_RDONLY);
+	assert(fd2 >= 0);
+
+	rv = fstat(fd0, &st0);
+	assert(rv == 0);
+	rv = fstat(fd1, &st1);
+	assert(rv == 0);
+	rv = fstat(fd2, &st2);
+	assert(rv == 0);
+
+	assert(st0.st_dev == st1.st_dev);
+	assert(st0.st_ino == st1.st_ino);
+
+	assert(st0.st_dev == st2.st_dev);
+	assert(st0.st_ino == st2.st_ino);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/fd-003-kthread.c b/tools/testing/selftests/proc/fd-003-kthread.c
new file mode 100644
index 000000000..dc591f97b
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-003-kthread.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/$KERNEL_THREAD/fd/ is empty.
+
+#undef NDEBUG
+#include <sys/syscall.h>
+#include <assert.h>
+#include <dirent.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+#define PF_KHTREAD 0x00200000
+
+/*
+ * Test for kernel threadness atomically with openat().
+ *
+ * Return /proc/$PID/fd descriptor if process is kernel thread.
+ * Return -1 if a process is userspace process.
+ */
+static int kernel_thread_fd(unsigned int pid)
+{
+	unsigned int flags = 0;
+	char buf[4096];
+	int dir_fd, fd;
+	ssize_t rv;
+
+	snprintf(buf, sizeof(buf), "/proc/%u", pid);
+	dir_fd = open(buf, O_RDONLY|O_DIRECTORY);
+	if (dir_fd == -1)
+		return -1;
+
+	/*
+	 * Believe it or not, struct task_struct::flags is directly exposed
+	 * to userspace!
+	 */
+	fd = openat(dir_fd, "stat", O_RDONLY);
+	if (fd == -1) {
+		close(dir_fd);
+		return -1;
+	}
+	rv = read(fd, buf, sizeof(buf));
+	close(fd);
+	if (0 < rv && rv <= sizeof(buf)) {
+		unsigned long long flags_ull;
+		char *p, *end;
+		int i;
+
+		assert(buf[rv - 1] == '\n');
+		buf[rv - 1] = '\0';
+
+		/* Search backwards: ->comm can contain whitespace and ')'. */
+		for (i = 0; i < 43; i++) {
+			p = strrchr(buf, ' ');
+			assert(p);
+			*p = '\0';
+		}
+
+		p = strrchr(buf, ' ');
+		assert(p);
+
+		flags_ull = xstrtoull(p + 1, &end);
+		assert(*end == '\0');
+		assert(flags_ull == (unsigned int)flags_ull);
+
+		flags = flags_ull;
+	}
+
+	fd = -1;
+	if (flags & PF_KHTREAD) {
+		fd = openat(dir_fd, "fd", O_RDONLY|O_DIRECTORY);
+	}
+	close(dir_fd);
+	return fd;
+}
+
+static void test_readdir(int fd)
+{
+	DIR *d;
+	struct dirent *de;
+
+	d = fdopendir(fd);
+	assert(d);
+
+	de = xreaddir(d);
+	assert(streq(de->d_name, "."));
+	assert(de->d_type == DT_DIR);
+
+	de = xreaddir(d);
+	assert(streq(de->d_name, ".."));
+	assert(de->d_type == DT_DIR);
+
+	de = xreaddir(d);
+	assert(!de);
+}
+
+static inline int sys_statx(int dirfd, const char *pathname, int flags,
+			    unsigned int mask, void *stx)
+{
+	return syscall(SYS_statx, dirfd, pathname, flags, mask, stx);
+}
+
+static void test_lookup_fail(int fd, const char *pathname)
+{
+	char stx[256] __attribute__((aligned(8)));
+	int rv;
+
+	rv = sys_statx(fd, pathname, AT_SYMLINK_NOFOLLOW, 0, (void *)stx);
+	assert(rv == -1 && errno == ENOENT);
+}
+
+static void test_lookup(int fd)
+{
+	char buf[64];
+	unsigned int u;
+	int i;
+
+	for (i = INT_MIN; i < INT_MIN + 1024; i++) {
+		snprintf(buf, sizeof(buf), "%d", i);
+		test_lookup_fail(fd, buf);
+	}
+	for (i = -1024; i < 1024; i++) {
+		snprintf(buf, sizeof(buf), "%d", i);
+		test_lookup_fail(fd, buf);
+	}
+	for (u = INT_MAX - 1024; u < (unsigned int)INT_MAX + 1024; u++) {
+		snprintf(buf, sizeof(buf), "%u", u);
+		test_lookup_fail(fd, buf);
+	}
+	for (u = UINT_MAX - 1024; u != 0; u++) {
+		snprintf(buf, sizeof(buf), "%u", u);
+		test_lookup_fail(fd, buf);
+	}
+}
+
+int main(void)
+{
+	unsigned int pid;
+	int fd;
+
+	/*
+	 * In theory this will loop indefinitely if kernel threads are exiled
+	 * from /proc.
+	 *
+	 * Start with kthreadd.
+	 */
+	pid = 2;
+	while ((fd = kernel_thread_fd(pid)) == -1 && pid < 1024) {
+		pid++;
+	}
+	/* EACCES if run as non-root. */
+	if (pid >= 1024)
+		return 1;
+
+	test_readdir(fd);
+	test_lookup(fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
new file mode 100644
index 000000000..8edaafc2b
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that /proc/loadavg correctly reports last pid in pid namespace. */
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+int main(void)
+{
+	pid_t pid;
+	int wstatus;
+
+	if (unshare(CLONE_NEWPID) == -1) {
+		if (errno == ENOSYS || errno == EPERM)
+			return 2;
+		return 1;
+	}
+
+	pid = fork();
+	if (pid == -1)
+		return 1;
+	if (pid == 0) {
+		char buf[128], *p;
+		int fd;
+		ssize_t rv;
+
+		fd = open("/proc/loadavg" , O_RDONLY);
+		if (fd == -1)
+			return 1;
+		rv = read(fd, buf, sizeof(buf));
+		if (rv < 3)
+			return 1;
+		p = buf + rv;
+
+		/* pid 1 */
+		if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n'))
+			return 1;
+
+		pid = fork();
+		if (pid == -1)
+			return 1;
+		if (pid == 0)
+			return 0;
+		if (waitpid(pid, NULL, 0) == -1)
+			return 1;
+
+		lseek(fd, 0, SEEK_SET);
+		rv = read(fd, buf, sizeof(buf));
+		if (rv < 3)
+			return 1;
+		p = buf + rv;
+
+		/* pid 2 */
+		if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n'))
+			return 1;
+
+		return 0;
+	}
+
+	if (waitpid(pid, &wstatus, 0) == -1)
+		return 1;
+	if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0)
+		return 0;
+	return 1;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c
new file mode 100644
index 000000000..4209c6428
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-001.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1)
+		exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+		return;
+	exit(1);
+}
+
+int main(void)
+{
+	const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+	void *p;
+	int fd;
+	unsigned long a, b;
+
+	fd = open("/dev/zero", O_RDONLY);
+	if (fd == -1)
+		return 1;
+
+	p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0);
+	if (p == MAP_FAILED)
+		return 1;
+
+	a = (unsigned long)p;
+	b = (unsigned long)p + PAGE_SIZE;
+
+	pass("/proc/self/map_files/%lx-%lx", a, b);
+	fail("/proc/self/map_files/ %lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx -%lx", a, b);
+	fail("/proc/self/map_files/%lx- %lx", a, b);
+	fail("/proc/self/map_files/%lx-%lx ", a, b);
+	fail("/proc/self/map_files/0%lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx-0%lx", a, b);
+	if (sizeof(long) == 4) {
+		fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+	} else if (sizeof(long) == 8) {
+		fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+	} else
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
new file mode 100644
index 000000000..85744425b
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... with minimum address. */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1)
+		exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+	char name[64];
+	char buf[64];
+
+	snprintf(name, sizeof(name), fmt, a, b);
+	if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+		return;
+	exit(1);
+}
+
+int main(void)
+{
+	const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+#ifdef __arm__
+	unsigned long va = 2 * PAGE_SIZE;
+#else
+	unsigned long va = 0;
+#endif
+	void *p;
+	int fd;
+	unsigned long a, b;
+
+	fd = open("/dev/zero", O_RDONLY);
+	if (fd == -1)
+		return 1;
+
+	p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+	if (p == MAP_FAILED) {
+		if (errno == EPERM)
+			return 2;
+		return 1;
+	}
+
+	a = (unsigned long)p;
+	b = (unsigned long)p + PAGE_SIZE;
+
+	pass("/proc/self/map_files/%lx-%lx", a, b);
+	fail("/proc/self/map_files/ %lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx -%lx", a, b);
+	fail("/proc/self/map_files/%lx- %lx", a, b);
+	fail("/proc/self/map_files/%lx-%lx ", a, b);
+	fail("/proc/self/map_files/0%lx-%lx", a, b);
+	fail("/proc/self/map_files/%lx-0%lx", a, b);
+	if (sizeof(long) == 4) {
+		fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+	} else if (sizeof(long) == 8) {
+		fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+		fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+	} else
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
new file mode 100644
index 000000000..7b9018fad
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+static inline ssize_t sys_read(int fd, void *buf, size_t len)
+{
+	return syscall(SYS_read, fd, buf, len);
+}
+
+int main(void)
+{
+	char buf1[64];
+	char buf2[64];
+	int fd;
+	ssize_t rv;
+
+	fd = open("/proc/self/syscall", O_RDONLY);
+	if (fd == -1) {
+		if (errno == ENOENT)
+			return 2;
+		return 1;
+	}
+
+	/* Do direct system call as libc can wrap anything. */
+	snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx",
+		 (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2));
+
+	memset(buf2, 0, sizeof(buf2));
+	rv = sys_read(fd, buf2, sizeof(buf2));
+	if (rv < 0)
+		return 1;
+	if (rv < strlen(buf1))
+		return 1;
+	if (strncmp(buf1, buf2, strlen(buf1)) != 0)
+		return 1;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c
new file mode 100644
index 000000000..a38b2fbaa
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-wchan.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+	char buf[64];
+	int fd;
+
+	fd = open("/proc/self/wchan", O_RDONLY);
+	if (fd == -1) {
+		if (errno == ENOENT)
+			return 2;
+		return 1;
+	}
+
+	buf[0] = '\0';
+	if (read(fd, buf, sizeof(buf)) != 1)
+		return 1;
+	if (buf[0] != '0')
+		return 1;
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c
new file mode 100644
index 000000000..781f7a50f
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-001.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically.
+#undef NDEBUG
+#include <assert.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+int main(void)
+{
+	uint64_t start, u0, u1, i0, i1;
+	int fd;
+
+	fd = open("/proc/uptime", O_RDONLY);
+	assert(fd >= 0);
+
+	proc_uptime(fd, &u0, &i0);
+	start = u0;
+	do {
+		proc_uptime(fd, &u1, &i1);
+		assert(u1 >= u0);
+		assert(i1 >= i0);
+		u0 = u1;
+		i0 = i1;
+	} while (u1 - start < 100);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
new file mode 100644
index 000000000..e7ceabed7
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically
+// while shifting across CPUs.
+#undef NDEBUG
+#include <assert.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+	return syscall(SYS_sched_getaffinity, pid, len, m);
+}
+
+static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+	return syscall(SYS_sched_setaffinity, pid, len, m);
+}
+
+int main(void)
+{
+	unsigned int len;
+	unsigned long *m;
+	unsigned int cpu;
+	uint64_t u0, u1, i0, i1;
+	int fd;
+
+	/* find out "nr_cpu_ids" */
+	m = NULL;
+	len = 0;
+	do {
+		len += sizeof(unsigned long);
+		free(m);
+		m = malloc(len);
+	} while (sys_sched_getaffinity(0, len, m) == -EINVAL);
+
+	fd = open("/proc/uptime", O_RDONLY);
+	assert(fd >= 0);
+
+	proc_uptime(fd, &u0, &i0);
+	for (cpu = 0; cpu < len * 8; cpu++) {
+		memset(m, 0, len);
+		m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long)));
+
+		/* CPU might not exist, ignore error */
+		sys_sched_setaffinity(0, len, m);
+
+		proc_uptime(fd, &u1, &i1);
+		assert(u1 >= u0);
+		assert(i1 >= i0);
+		u0 = u1;
+		i0 = i1;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
new file mode 100644
index 000000000..dc6a42b1d
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
+{
+	uint64_t val1, val2;
+	char buf[64], *p;
+	ssize_t rv;
+
+	/* save "p < end" checks */
+	memset(buf, 0, sizeof(buf));
+	rv = pread(fd, buf, sizeof(buf), 0);
+	assert(0 <= rv && rv <= sizeof(buf));
+	buf[sizeof(buf) - 1] = '\0';
+
+	p = buf;
+
+	val1 = xstrtoull(p, &p);
+	assert(p[0] == '.');
+	assert('0' <= p[1] && p[1] <= '9');
+	assert('0' <= p[2] && p[2] <= '9');
+	assert(p[3] == ' ');
+
+	val2 = (p[1] - '0') * 10 + p[2] - '0';
+	*uptime = val1 * 100 + val2;
+
+	p += 4;
+
+	val1 = xstrtoull(p, &p);
+	assert(p[0] == '.');
+	assert('0' <= p[1] && p[1] <= '9');
+	assert('0' <= p[2] && p[2] <= '9');
+	assert(p[3] == '\n');
+
+	val2 = (p[1] - '0') * 10 + p[2] - '0';
+	*idle = val1 * 100 + val2;
+
+	assert(p + 4 == buf + rv);
+}
diff --git a/tools/testing/selftests/proc/proc.h b/tools/testing/selftests/proc/proc.h
new file mode 100644
index 000000000..b7d57ea40
--- /dev/null
+++ b/tools/testing/selftests/proc/proc.h
@@ -0,0 +1,51 @@
+#pragma once
+#undef NDEBUG
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t sys_getpid(void)
+{
+	return syscall(SYS_getpid);
+}
+
+static inline pid_t sys_gettid(void)
+{
+	return syscall(SYS_gettid);
+}
+
+static inline bool streq(const char *s1, const char *s2)
+{
+	return strcmp(s1, s2) == 0;
+}
+
+static unsigned long long xstrtoull(const char *p, char **end)
+{
+	if (*p == '0') {
+		*end = (char *)p + 1;
+		return 0;
+	} else if ('1' <= *p && *p <= '9') {
+		unsigned long long val;
+
+		errno = 0;
+		val = strtoull(p, end, 10);
+		assert(errno == 0);
+		return val;
+	} else
+		assert(0);
+}
+
+static struct dirent *xreaddir(DIR *d)
+{
+	struct dirent *de;
+
+	errno = 0;
+	de = readdir(d);
+	assert(de || errno == 0);
+	return de;
+}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
new file mode 100644
index 000000000..563e752e6
--- /dev/null
+++ b/tools/testing/selftests/proc/read.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test
+// 1) read of every file in /proc
+// 2) readlink of every symlink in /proc
+// 3) recursively (1) + (2) for every directory in /proc
+// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
+// 5) write to /proc/sysrq-trigger
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+static void f_reg(DIR *d, const char *filename)
+{
+	char buf[4096];
+	int fd;
+	ssize_t rv;
+
+	/* read from /proc/kmsg can block */
+	fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
+	if (fd == -1)
+		return;
+	rv = read(fd, buf, sizeof(buf));
+	assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+	close(fd);
+}
+
+static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len)
+{
+	int fd;
+	ssize_t rv;
+
+	fd = openat(dirfd(d), filename, O_WRONLY);
+	if (fd == -1)
+		return;
+	rv = write(fd, buf, len);
+	assert((0 <= rv && rv <= len) || rv == -1);
+	close(fd);
+}
+
+static void f_lnk(DIR *d, const char *filename)
+{
+	char buf[4096];
+	ssize_t rv;
+
+	rv = readlinkat(dirfd(d), filename, buf, sizeof(buf));
+	assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+}
+
+static void f(DIR *d, unsigned int level)
+{
+	struct dirent *de;
+
+	de = xreaddir(d);
+	assert(de->d_type == DT_DIR);
+	assert(streq(de->d_name, "."));
+
+	de = xreaddir(d);
+	assert(de->d_type == DT_DIR);
+	assert(streq(de->d_name, ".."));
+
+	while ((de = xreaddir(d))) {
+		assert(!streq(de->d_name, "."));
+		assert(!streq(de->d_name, ".."));
+
+		switch (de->d_type) {
+			DIR *dd;
+			int fd;
+
+		case DT_REG:
+			if (level == 0 && streq(de->d_name, "sysrq-trigger")) {
+				f_reg_write(d, de->d_name, "h", 1);
+			} else if (level == 1 && streq(de->d_name, "clear_refs")) {
+				f_reg_write(d, de->d_name, "1", 1);
+			} else if (level == 3 && streq(de->d_name, "clear_refs")) {
+				f_reg_write(d, de->d_name, "1", 1);
+			} else {
+				f_reg(d, de->d_name);
+			}
+			break;
+		case DT_DIR:
+			fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY);
+			if (fd == -1)
+				continue;
+			dd = fdopendir(fd);
+			if (!dd)
+				continue;
+			f(dd, level + 1);
+			closedir(dd);
+			break;
+		case DT_LNK:
+			f_lnk(d, de->d_name);
+			break;
+		default:
+			assert(0);
+		}
+	}
+}
+
+int main(void)
+{
+	DIR *d;
+
+	d = opendir("/proc");
+	if (!d)
+		return 2;
+	f(d, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/self.c b/tools/testing/selftests/proc/self.c
new file mode 100644
index 000000000..21c15a1ff
--- /dev/null
+++ b/tools/testing/selftests/proc/self.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/self gives correct TGID.
+#undef NDEBUG
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+int main(void)
+{
+	char buf1[64], buf2[64];
+	pid_t pid;
+	ssize_t rv;
+
+	pid = sys_getpid();
+	snprintf(buf1, sizeof(buf1), "%u", pid);
+
+	rv = readlink("/proc/self", buf2, sizeof(buf2));
+	assert(rv == strlen(buf1));
+	buf2[rv] = '\0';
+	assert(streq(buf1, buf2));
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c
new file mode 100644
index 000000000..60ab197a7
--- /dev/null
+++ b/tools/testing/selftests/proc/setns-dcache.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that setns(CLONE_NEWNET) points to new /proc/net content even
+ * if old one is in dcache.
+ *
+ * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+
+static pid_t pid = -1;
+
+static void f(void)
+{
+	if (pid > 0) {
+		kill(pid, SIGTERM);
+	}
+}
+
+int main(void)
+{
+	int fd[2];
+	char _ = 0;
+	int nsfd;
+
+	atexit(f);
+
+	/* Check for priviledges and syscall availability straight away. */
+	if (unshare(CLONE_NEWNET) == -1) {
+		if (errno == ENOSYS || errno == EPERM) {
+			return 4;
+		}
+		return 1;
+	}
+	/* Distinguisher between two otherwise empty net namespaces. */
+	if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) {
+		return 1;
+	}
+
+	if (pipe(fd) == -1) {
+		return 1;
+	}
+
+	pid = fork();
+	if (pid == -1) {
+		return 1;
+	}
+
+	if (pid == 0) {
+		if (unshare(CLONE_NEWNET) == -1) {
+			return 1;
+		}
+
+		if (write(fd[1], &_, 1) != 1) {
+			return 1;
+		}
+
+		pause();
+
+		return 0;
+	}
+
+	if (read(fd[0], &_, 1) != 1) {
+		return 1;
+	}
+
+	{
+		char buf[64];
+		snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid);
+		nsfd = open(buf, O_RDONLY);
+		if (nsfd == -1) {
+			return 1;
+		}
+	}
+
+	/* Reliably pin dentry into dcache. */
+	(void)open("/proc/net/unix", O_RDONLY);
+
+	if (setns(nsfd, CLONE_NEWNET) == -1) {
+		return 1;
+	}
+
+	kill(pid, SIGTERM);
+	pid = 0;
+
+	{
+		char buf[4096];
+		ssize_t rv;
+		int fd;
+
+		fd = open("/proc/net/unix", O_RDONLY);
+		if (fd == -1) {
+			return 1;
+		}
+
+#define S "Num       RefCount Protocol Flags    Type St Inode Path\n"
+		rv = read(fd, buf, sizeof(buf));
+
+		assert(rv == strlen(S));
+		assert(memcmp(buf, S, strlen(S)) == 0);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/proc/thread-self.c b/tools/testing/selftests/proc/thread-self.c
new file mode 100644
index 000000000..4b23b39b7
--- /dev/null
+++ b/tools/testing/selftests/proc/thread-self.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/thread-self gives correct TGID/PID.
+#undef NDEBUG
+#include <assert.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "proc.h"
+
+int f(void *arg)
+{
+	char buf1[64], buf2[64];
+	pid_t pid, tid;
+	ssize_t rv;
+
+	pid = sys_getpid();
+	tid = sys_gettid();
+	snprintf(buf1, sizeof(buf1), "%u/task/%u", pid, tid);
+
+	rv = readlink("/proc/thread-self", buf2, sizeof(buf2));
+	assert(rv == strlen(buf1));
+	buf2[rv] = '\0';
+	assert(streq(buf1, buf2));
+
+	if (arg)
+		exit(0);
+	return 0;
+}
+
+int main(void)
+{
+	const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+	pid_t pid;
+	void *stack;
+
+	/* main thread */
+	f((void *)0);
+
+	stack = mmap(NULL, 2 * PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	assert(stack != MAP_FAILED);
+	/* side thread */
+	pid = clone(f, stack + PAGE_SIZE, CLONE_THREAD|CLONE_SIGHAND|CLONE_VM, (void *)1);
+	assert(pid > 0);
+	pause();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/pstore/.gitignore b/tools/testing/selftests/pstore/.gitignore
new file mode 100644
index 000000000..5a4a26e54
--- /dev/null
+++ b/tools/testing/selftests/pstore/.gitignore
@@ -0,0 +1,2 @@
+logs
+*uuid
diff --git a/tools/testing/selftests/pstore/Makefile b/tools/testing/selftests/pstore/Makefile
new file mode 100644
index 000000000..5ef57855a
--- /dev/null
+++ b/tools/testing/selftests/pstore/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for pstore selftests.
+# Expects pstore backend is registered.
+
+all:
+
+TEST_PROGS := pstore_tests pstore_post_reboot_tests
+TEST_FILES := common_tests pstore_crash_test
+EXTRA_CLEAN := logs/* *uuid
+
+include ../lib.mk
+
+run_crash:
+	@sh pstore_crash_test || { echo "pstore_crash_test: [FAIL]"; exit 1; }
diff --git a/tools/testing/selftests/pstore/common_tests b/tools/testing/selftests/pstore/common_tests
new file mode 100755
index 000000000..3ea64d7cf
--- /dev/null
+++ b/tools/testing/selftests/pstore/common_tests
@@ -0,0 +1,83 @@
+#!/bin/sh
+
+# common_tests - Shell script commonly used by pstore test scripts
+#
+# Copyright (C) Hitachi Ltd., 2015
+#  Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# Utilities
+errexit() { # message
+    echo "Error: $1" 1>&2
+    exit 1
+}
+
+absdir() { # file_path
+    (cd `dirname $1`; pwd)
+}
+
+show_result() { # result_value
+    if [ $1 -eq 0 ]; then
+	prlog "ok"
+    else
+	prlog "FAIL"
+	rc=1
+    fi
+}
+
+check_files_exist() { # type of pstorefs file
+    if [ -e ${1}-${backend}-0 ]; then
+	prlog "ok"
+	for f in `ls ${1}-${backend}-*`; do
+            prlog -e "\t${f}"
+	done
+    else
+	prlog "FAIL"
+	rc=1
+    fi
+}
+
+operate_files() { # tested value, files, operation
+    if [ $1 -eq 0 ]; then
+	prlog
+	for f in $2; do
+	    prlog -ne "\t${f} ... "
+	    # execute operation
+	    $3 $f
+	    show_result $?
+	done
+    else
+	prlog " ... FAIL"
+	rc=1
+    fi
+}
+
+# Parameters
+TEST_STRING_PATTERN="Testing pstore: uuid="
+UUID=`cat /proc/sys/kernel/random/uuid`
+TOP_DIR=`absdir $0`
+LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`_${UUID}/
+REBOOT_FLAG=$TOP_DIR/reboot_flag
+
+# Preparing logs
+LOG_FILE=$LOG_DIR/`basename $0`.log
+mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+date > $LOG_FILE
+prlog() { # messages
+    /bin/echo "$@" | tee -a $LOG_FILE
+}
+
+# Starting tests
+rc=0
+prlog "=== Pstore unit tests (`basename $0`) ==="
+prlog "UUID="$UUID
+
+prlog -n "Checking pstore backend is registered ... "
+backend=`cat /sys/module/pstore/parameters/backend`
+show_result $?
+prlog -e "\tbackend=${backend}"
+prlog -e "\tcmdline=`cat /proc/cmdline`"
+if [ $rc -ne 0 ]; then
+    exit 1
+fi
diff --git a/tools/testing/selftests/pstore/config b/tools/testing/selftests/pstore/config
new file mode 100644
index 000000000..d148f9f89
--- /dev/null
+++ b/tools/testing/selftests/pstore/config
@@ -0,0 +1,5 @@
+CONFIG_MISC_FILESYSTEMS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_PMSG=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=m
diff --git a/tools/testing/selftests/pstore/pstore_crash_test b/tools/testing/selftests/pstore/pstore_crash_test
new file mode 100755
index 000000000..1a4afe5c1
--- /dev/null
+++ b/tools/testing/selftests/pstore/pstore_crash_test
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# pstore_crash_test - Pstore test shell script which causes crash and reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+#  Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# exit if pstore backend is not registered
+. ./common_tests
+
+prlog "Causing kernel crash ..."
+
+# enable all functions triggered by sysrq
+echo 1 > /proc/sys/kernel/sysrq
+# setting to reboot in 3 seconds after panic
+echo 3 > /proc/sys/kernel/panic
+
+# save uuid file by different name because next test execution will replace it.
+mv $TOP_DIR/uuid $TOP_DIR/prev_uuid
+
+# create a file as reboot flag
+touch $REBOOT_FLAG
+sync
+
+# cause crash
+# Note: If you use kdump and want to see kmesg-* files after reboot, you should
+#       specify 'crash_kexec_post_notifiers' in 1st kernel's cmdline.
+echo c > /proc/sysrq-trigger
diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests
new file mode 100755
index 000000000..22f8df1ad
--- /dev/null
+++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+# pstore_post_reboot_tests - Check pstore's behavior after crash/reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+#  Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+. ./common_tests
+
+if [ -e $REBOOT_FLAG  ]; then
+    rm $REBOOT_FLAG
+else
+    prlog "pstore_crash_test has not been executed yet. we skip further tests."
+    exit $ksft_skip
+fi
+
+prlog -n "Mounting pstore filesystem ... "
+mount_info=`grep pstore /proc/mounts`
+if [ $? -eq 0 ]; then
+    mount_point=`echo ${mount_info} | cut -d' ' -f2 | head -n1`
+    prlog "ok"
+else
+    mount none /sys/fs/pstore -t pstore
+    if [ $? -eq 0 ]; then
+	mount_point=`grep pstore /proc/mounts | cut -d' ' -f2 | head -n1`
+	prlog "ok"
+    else
+	prlog "FAIL"
+	exit 1
+    fi
+fi
+
+cd ${mount_point}
+
+prlog -n "Checking dmesg files exist in pstore filesystem ... "
+check_files_exist dmesg
+
+prlog -n "Checking console files exist in pstore filesystem ... "
+check_files_exist console
+
+prlog -n "Checking pmsg files exist in pstore filesystem ... "
+check_files_exist pmsg
+
+prlog -n "Checking dmesg files contain oops end marker"
+grep_end_trace() {
+    grep -q "\---\[ end trace" $1
+}
+files=`ls dmesg-${backend}-*`
+operate_files $? "$files" grep_end_trace
+
+prlog -n "Checking console file contains oops end marker ... "
+grep -q "\---\[ end trace" console-${backend}-0
+show_result $?
+
+prlog -n "Checking pmsg file properly keeps the content written before crash ... "
+prev_uuid=`cat $TOP_DIR/prev_uuid`
+if [ $? -eq 0 ]; then
+    nr_matched=`grep -c "$TEST_STRING_PATTERN" pmsg-${backend}-0`
+    if [ $nr_matched -eq 1 ]; then
+	grep -q "$TEST_STRING_PATTERN"$prev_uuid pmsg-${backend}-0
+	show_result $?
+    else
+	prlog "FAIL"
+	rc=1
+    fi
+else
+    prlog "FAIL"
+    rc=1
+fi
+
+prlog -n "Removing all files in pstore filesystem "
+files=`ls *-${backend}-*`
+operate_files $? "$files" rm
+
+exit $rc
diff --git a/tools/testing/selftests/pstore/pstore_tests b/tools/testing/selftests/pstore/pstore_tests
new file mode 100755
index 000000000..f25d2a349
--- /dev/null
+++ b/tools/testing/selftests/pstore/pstore_tests
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# pstore_tests - Check pstore's behavior before crash/reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+#  Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+. ./common_tests
+
+prlog -n "Checking pstore console is registered ... "
+dmesg | grep -q "console \[pstore"
+show_result $?
+
+prlog -n "Checking /dev/pmsg0 exists ... "
+test -e /dev/pmsg0
+show_result $?
+
+prlog -n "Writing unique string to /dev/pmsg0 ... "
+if [ -e "/dev/pmsg0" ]; then
+    echo "${TEST_STRING_PATTERN}""$UUID" > /dev/pmsg0
+    show_result $?
+    echo "$UUID" > $TOP_DIR/uuid
+else
+    prlog "FAIL"
+    rc=1
+fi
+
+exit $rc
diff --git a/tools/testing/selftests/ptp/.gitignore b/tools/testing/selftests/ptp/.gitignore
new file mode 100644
index 000000000..f562e49d6
--- /dev/null
+++ b/tools/testing/selftests/ptp/.gitignore
@@ -0,0 +1 @@
+testptp
diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile
new file mode 100644
index 000000000..ef06de089
--- /dev/null
+++ b/tools/testing/selftests/ptp/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -I../../../../usr/include/
+TEST_PROGS := testptp
+LDLIBS += -lrt
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+	rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
new file mode 100644
index 000000000..a5d8f0ab0
--- /dev/null
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -0,0 +1,521 @@
+/*
+ * PTP 1588 clock support - User space test program
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__        /* For PPC64, to get LL64 types */
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <math.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/ptp_clock.h>
+
+#define DEVICE "/dev/ptp0"
+
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET 0x0100
+#endif
+
+#ifndef CLOCK_INVALID
+#define CLOCK_INVALID -1
+#endif
+
+/* clock_adjtime is not available in GLIBC < 2.14 */
+#if !__GLIBC_PREREQ(2, 14)
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+	return syscall(__NR_clock_adjtime, id, tx);
+}
+#endif
+
+static clockid_t get_clockid(int fd)
+{
+#define CLOCKFD 3
+	return (((unsigned int) ~fd) << 3) | CLOCKFD;
+}
+
+static void handle_alarm(int s)
+{
+	printf("received signal %d\n", s);
+}
+
+static int install_handler(int signum, void (*handler)(int))
+{
+	struct sigaction action;
+	sigset_t mask;
+
+	/* Unblock the signal. */
+	sigemptyset(&mask);
+	sigaddset(&mask, signum);
+	sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+	/* Install the signal handler. */
+	action.sa_handler = handler;
+	action.sa_flags = 0;
+	sigemptyset(&action.sa_mask);
+	sigaction(signum, &action, NULL);
+
+	return 0;
+}
+
+static long ppb_to_scaled_ppm(int ppb)
+{
+	/*
+	 * The 'freq' field in the 'struct timex' is in parts per
+	 * million, but with a 16 bit binary fractional field.
+	 * Instead of calculating either one of
+	 *
+	 *    scaled_ppm = (ppb / 1000) << 16  [1]
+	 *    scaled_ppm = (ppb << 16) / 1000  [2]
+	 *
+	 * we simply use double precision math, in order to avoid the
+	 * truncation in [1] and the possible overflow in [2].
+	 */
+	return (long) (ppb * 65.536);
+}
+
+static int64_t pctns(struct ptp_clock_time *t)
+{
+	return t->sec * 1000000000LL + t->nsec;
+}
+
+static void usage(char *progname)
+{
+	fprintf(stderr,
+		"usage: %s [options]\n"
+		" -a val     request a one-shot alarm after 'val' seconds\n"
+		" -A val     request a periodic alarm every 'val' seconds\n"
+		" -c         query the ptp clock's capabilities\n"
+		" -d name    device to open\n"
+		" -e val     read 'val' external time stamp events\n"
+		" -f val     adjust the ptp clock frequency by 'val' ppb\n"
+		" -g         get the ptp clock time\n"
+		" -h         prints this message\n"
+		" -i val     index for event/trigger\n"
+		" -k val     measure the time offset between system and phc clock\n"
+		"            for 'val' times (Maximum 25)\n"
+		" -l         list the current pin configuration\n"
+		" -L pin,val configure pin index 'pin' with function 'val'\n"
+		"            the channel index is taken from the '-i' option\n"
+		"            'val' specifies the auxiliary function:\n"
+		"            0 - none\n"
+		"            1 - external time stamp\n"
+		"            2 - periodic output\n"
+		" -p val     enable output with a period of 'val' nanoseconds\n"
+		" -P val     enable or disable (val=1|0) the system clock PPS\n"
+		" -s         set the ptp clock time from the system time\n"
+		" -S         set the system time from the ptp clock time\n"
+		" -t val     shift the ptp clock time by 'val' seconds\n"
+		" -T val     set the ptp clock time to 'val' seconds\n",
+		progname);
+}
+
+int main(int argc, char *argv[])
+{
+	struct ptp_clock_caps caps;
+	struct ptp_extts_event event;
+	struct ptp_extts_request extts_request;
+	struct ptp_perout_request perout_request;
+	struct ptp_pin_desc desc;
+	struct timespec ts;
+	struct timex tx;
+
+	static timer_t timerid;
+	struct itimerspec timeout;
+	struct sigevent sigevent;
+
+	struct ptp_clock_time *pct;
+	struct ptp_sys_offset *sysoff;
+
+
+	char *progname;
+	unsigned int i;
+	int c, cnt, fd;
+
+	char *device = DEVICE;
+	clockid_t clkid;
+	int adjfreq = 0x7fffffff;
+	int adjtime = 0;
+	int capabilities = 0;
+	int extts = 0;
+	int gettime = 0;
+	int index = 0;
+	int list_pins = 0;
+	int oneshot = 0;
+	int pct_offset = 0;
+	int n_samples = 0;
+	int periodic = 0;
+	int perout = -1;
+	int pin_index = -1, pin_func;
+	int pps = -1;
+	int seconds = 0;
+	int settime = 0;
+
+	int64_t t1, t2, tp;
+	int64_t interval, offset;
+
+	progname = strrchr(argv[0], '/');
+	progname = progname ? 1+progname : argv[0];
+	while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) {
+		switch (c) {
+		case 'a':
+			oneshot = atoi(optarg);
+			break;
+		case 'A':
+			periodic = atoi(optarg);
+			break;
+		case 'c':
+			capabilities = 1;
+			break;
+		case 'd':
+			device = optarg;
+			break;
+		case 'e':
+			extts = atoi(optarg);
+			break;
+		case 'f':
+			adjfreq = atoi(optarg);
+			break;
+		case 'g':
+			gettime = 1;
+			break;
+		case 'i':
+			index = atoi(optarg);
+			break;
+		case 'k':
+			pct_offset = 1;
+			n_samples = atoi(optarg);
+			break;
+		case 'l':
+			list_pins = 1;
+			break;
+		case 'L':
+			cnt = sscanf(optarg, "%d,%d", &pin_index, &pin_func);
+			if (cnt != 2) {
+				usage(progname);
+				return -1;
+			}
+			break;
+		case 'p':
+			perout = atoi(optarg);
+			break;
+		case 'P':
+			pps = atoi(optarg);
+			break;
+		case 's':
+			settime = 1;
+			break;
+		case 'S':
+			settime = 2;
+			break;
+		case 't':
+			adjtime = atoi(optarg);
+			break;
+		case 'T':
+			settime = 3;
+			seconds = atoi(optarg);
+			break;
+		case 'h':
+			usage(progname);
+			return 0;
+		case '?':
+		default:
+			usage(progname);
+			return -1;
+		}
+	}
+
+	fd = open(device, O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, "opening %s: %s\n", device, strerror(errno));
+		return -1;
+	}
+
+	clkid = get_clockid(fd);
+	if (CLOCK_INVALID == clkid) {
+		fprintf(stderr, "failed to read clock id\n");
+		return -1;
+	}
+
+	if (capabilities) {
+		if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+			perror("PTP_CLOCK_GETCAPS");
+		} else {
+			printf("capabilities:\n"
+			       "  %d maximum frequency adjustment (ppb)\n"
+			       "  %d programmable alarms\n"
+			       "  %d external time stamp channels\n"
+			       "  %d programmable periodic signals\n"
+			       "  %d pulse per second\n"
+			       "  %d programmable pins\n"
+			       "  %d cross timestamping\n",
+			       caps.max_adj,
+			       caps.n_alarm,
+			       caps.n_ext_ts,
+			       caps.n_per_out,
+			       caps.pps,
+			       caps.n_pins,
+			       caps.cross_timestamping);
+		}
+	}
+
+	if (0x7fffffff != adjfreq) {
+		memset(&tx, 0, sizeof(tx));
+		tx.modes = ADJ_FREQUENCY;
+		tx.freq = ppb_to_scaled_ppm(adjfreq);
+		if (clock_adjtime(clkid, &tx)) {
+			perror("clock_adjtime");
+		} else {
+			puts("frequency adjustment okay");
+		}
+	}
+
+	if (adjtime) {
+		memset(&tx, 0, sizeof(tx));
+		tx.modes = ADJ_SETOFFSET;
+		tx.time.tv_sec = adjtime;
+		tx.time.tv_usec = 0;
+		if (clock_adjtime(clkid, &tx) < 0) {
+			perror("clock_adjtime");
+		} else {
+			puts("time shift okay");
+		}
+	}
+
+	if (gettime) {
+		if (clock_gettime(clkid, &ts)) {
+			perror("clock_gettime");
+		} else {
+			printf("clock time: %ld.%09ld or %s",
+			       ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec));
+		}
+	}
+
+	if (settime == 1) {
+		clock_gettime(CLOCK_REALTIME, &ts);
+		if (clock_settime(clkid, &ts)) {
+			perror("clock_settime");
+		} else {
+			puts("set time okay");
+		}
+	}
+
+	if (settime == 2) {
+		clock_gettime(clkid, &ts);
+		if (clock_settime(CLOCK_REALTIME, &ts)) {
+			perror("clock_settime");
+		} else {
+			puts("set time okay");
+		}
+	}
+
+	if (settime == 3) {
+		ts.tv_sec = seconds;
+		ts.tv_nsec = 0;
+		if (clock_settime(clkid, &ts)) {
+			perror("clock_settime");
+		} else {
+			puts("set time okay");
+		}
+	}
+
+	if (extts) {
+		memset(&extts_request, 0, sizeof(extts_request));
+		extts_request.index = index;
+		extts_request.flags = PTP_ENABLE_FEATURE;
+		if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+			perror("PTP_EXTTS_REQUEST");
+			extts = 0;
+		} else {
+			puts("external time stamp request okay");
+		}
+		for (; extts; extts--) {
+			cnt = read(fd, &event, sizeof(event));
+			if (cnt != sizeof(event)) {
+				perror("read");
+				break;
+			}
+			printf("event index %u at %lld.%09u\n", event.index,
+			       event.t.sec, event.t.nsec);
+			fflush(stdout);
+		}
+		/* Disable the feature again. */
+		extts_request.flags = 0;
+		if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+			perror("PTP_EXTTS_REQUEST");
+		}
+	}
+
+	if (list_pins) {
+		int n_pins = 0;
+		if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+			perror("PTP_CLOCK_GETCAPS");
+		} else {
+			n_pins = caps.n_pins;
+		}
+		for (i = 0; i < n_pins; i++) {
+			desc.index = i;
+			if (ioctl(fd, PTP_PIN_GETFUNC, &desc)) {
+				perror("PTP_PIN_GETFUNC");
+				break;
+			}
+			printf("name %s index %u func %u chan %u\n",
+			       desc.name, desc.index, desc.func, desc.chan);
+		}
+	}
+
+	if (oneshot) {
+		install_handler(SIGALRM, handle_alarm);
+		/* Create a timer. */
+		sigevent.sigev_notify = SIGEV_SIGNAL;
+		sigevent.sigev_signo = SIGALRM;
+		if (timer_create(clkid, &sigevent, &timerid)) {
+			perror("timer_create");
+			return -1;
+		}
+		/* Start the timer. */
+		memset(&timeout, 0, sizeof(timeout));
+		timeout.it_value.tv_sec = oneshot;
+		if (timer_settime(timerid, 0, &timeout, NULL)) {
+			perror("timer_settime");
+			return -1;
+		}
+		pause();
+		timer_delete(timerid);
+	}
+
+	if (periodic) {
+		install_handler(SIGALRM, handle_alarm);
+		/* Create a timer. */
+		sigevent.sigev_notify = SIGEV_SIGNAL;
+		sigevent.sigev_signo = SIGALRM;
+		if (timer_create(clkid, &sigevent, &timerid)) {
+			perror("timer_create");
+			return -1;
+		}
+		/* Start the timer. */
+		memset(&timeout, 0, sizeof(timeout));
+		timeout.it_interval.tv_sec = periodic;
+		timeout.it_value.tv_sec = periodic;
+		if (timer_settime(timerid, 0, &timeout, NULL)) {
+			perror("timer_settime");
+			return -1;
+		}
+		while (1) {
+			pause();
+		}
+		timer_delete(timerid);
+	}
+
+	if (perout >= 0) {
+		if (clock_gettime(clkid, &ts)) {
+			perror("clock_gettime");
+			return -1;
+		}
+		memset(&perout_request, 0, sizeof(perout_request));
+		perout_request.index = index;
+		perout_request.start.sec = ts.tv_sec + 2;
+		perout_request.start.nsec = 0;
+		perout_request.period.sec = 0;
+		perout_request.period.nsec = perout;
+		if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+			perror("PTP_PEROUT_REQUEST");
+		} else {
+			puts("periodic output request okay");
+		}
+	}
+
+	if (pin_index >= 0) {
+		memset(&desc, 0, sizeof(desc));
+		desc.index = pin_index;
+		desc.func = pin_func;
+		desc.chan = index;
+		if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) {
+			perror("PTP_PIN_SETFUNC");
+		} else {
+			puts("set pin function okay");
+		}
+	}
+
+	if (pps != -1) {
+		int enable = pps ? 1 : 0;
+		if (ioctl(fd, PTP_ENABLE_PPS, enable)) {
+			perror("PTP_ENABLE_PPS");
+		} else {
+			puts("pps for system time request okay");
+		}
+	}
+
+	if (pct_offset) {
+		if (n_samples <= 0 || n_samples > 25) {
+			puts("n_samples should be between 1 and 25");
+			usage(progname);
+			return -1;
+		}
+
+		sysoff = calloc(1, sizeof(*sysoff));
+		if (!sysoff) {
+			perror("calloc");
+			return -1;
+		}
+		sysoff->n_samples = n_samples;
+
+		if (ioctl(fd, PTP_SYS_OFFSET, sysoff))
+			perror("PTP_SYS_OFFSET");
+		else
+			puts("system and phc clock time offset request okay");
+
+		pct = &sysoff->ts[0];
+		for (i = 0; i < sysoff->n_samples; i++) {
+			t1 = pctns(pct+2*i);
+			tp = pctns(pct+2*i+1);
+			t2 = pctns(pct+2*i+2);
+			interval = t2 - t1;
+			offset = (t2 + t1) / 2 - tp;
+
+			printf("system time: %lld.%u\n",
+				(pct+2*i)->sec, (pct+2*i)->nsec);
+			printf("phc    time: %lld.%u\n",
+				(pct+2*i+1)->sec, (pct+2*i+1)->nsec);
+			printf("system time: %lld.%u\n",
+				(pct+2*i+2)->sec, (pct+2*i+2)->nsec);
+			printf("system/phc clock time offset is %" PRId64 " ns\n"
+			       "system     clock time delay  is %" PRId64 " ns\n",
+				offset, interval);
+		}
+
+		free(sysoff);
+	}
+
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/ptp/testptp.mk b/tools/testing/selftests/ptp/testptp.mk
new file mode 100644
index 000000000..4ef2d9755
--- /dev/null
+++ b/tools/testing/selftests/ptp/testptp.mk
@@ -0,0 +1,33 @@
+# PTP 1588 clock support - User space test program
+#
+# Copyright (C) 2010 OMICRON electronics GmbH
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CC        = $(CROSS_COMPILE)gcc
+INC       = -I$(KBUILD_OUTPUT)/usr/include
+CFLAGS    = -Wall $(INC)
+LDLIBS    = -lrt
+PROGS     = testptp
+
+all: $(PROGS)
+
+testptp: testptp.o
+
+clean:
+	rm -f testptp.o
+
+distclean: clean
+	rm -f $(PROGS)
diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
new file mode 100644
index 000000000..b3e59d41f
--- /dev/null
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -0,0 +1 @@
+peeksiginfo
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
new file mode 100644
index 000000000..8a2bc5562
--- /dev/null
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -0,0 +1,5 @@
+CFLAGS += -iquote../../../../include/uapi -Wall
+
+TEST_GEN_PROGS := peeksiginfo
+
+include ../lib.mk
diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c
new file mode 100644
index 000000000..54900657e
--- /dev/null
+++ b/tools/testing/selftests/ptrace/peeksiginfo.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <sys/mman.h>
+
+#include "linux/ptrace.h"
+
+static int sys_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo)
+{
+	return syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo);
+}
+
+static int sys_rt_tgsigqueueinfo(pid_t tgid, pid_t tid,
+					int sig, siginfo_t *uinfo)
+{
+	return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, uinfo);
+}
+
+static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
+{
+	return syscall(SYS_ptrace, request, pid, addr, data);
+}
+
+#define SIGNR 10
+#define TEST_SICODE_PRIV	-1
+#define TEST_SICODE_SHARE	-2
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#endif
+
+#define err(fmt, ...)						\
+		fprintf(stderr,					\
+			"Error (%s:%d): " fmt,			\
+			__FILE__, __LINE__, ##__VA_ARGS__)
+
+static int check_error_paths(pid_t child)
+{
+	struct ptrace_peeksiginfo_args arg;
+	int ret, exit_code = -1;
+	void *addr_rw, *addr_ro;
+
+	/*
+	 * Allocate two contiguous pages. The first one is for read-write,
+	 * another is for read-only.
+	 */
+	addr_rw = mmap(NULL, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE,
+				MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (addr_rw == MAP_FAILED) {
+		err("mmap() failed: %m\n");
+		return 1;
+	}
+
+	addr_ro = mmap(addr_rw + PAGE_SIZE, PAGE_SIZE, PROT_READ,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+	if (addr_ro == MAP_FAILED) {
+		err("mmap() failed: %m\n");
+		goto out;
+	}
+
+	arg.nr = SIGNR;
+	arg.off = 0;
+
+	/* Unsupported flags */
+	arg.flags = ~0;
+	ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_rw);
+	if (ret != -1 || errno != EINVAL) {
+		err("sys_ptrace() returns %d (expected -1),"
+				" errno %d (expected %d): %m\n",
+				ret, errno, EINVAL);
+		goto out;
+	}
+	arg.flags = 0;
+
+	/* A part of the buffer is read-only */
+	ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg,
+					addr_ro - sizeof(siginfo_t) * 2);
+	if (ret != 2) {
+		err("sys_ptrace() returns %d (expected 2): %m\n", ret);
+		goto out;
+	}
+
+	/* Read-only buffer */
+	ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_ro);
+	if (ret != -1 && errno != EFAULT) {
+		err("sys_ptrace() returns %d (expected -1),"
+				" errno %d (expected %d): %m\n",
+				ret, errno, EFAULT);
+		goto out;
+	}
+
+	exit_code = 0;
+out:
+	munmap(addr_rw, 2 * PAGE_SIZE);
+	return exit_code;
+}
+
+int check_direct_path(pid_t child, int shared, int nr)
+{
+	struct ptrace_peeksiginfo_args arg = {.flags = 0, .nr = nr, .off = 0};
+	int i, j, ret, exit_code = -1;
+	siginfo_t siginfo[SIGNR];
+	int si_code;
+
+	if (shared == 1) {
+		arg.flags = PTRACE_PEEKSIGINFO_SHARED;
+		si_code = TEST_SICODE_SHARE;
+	} else {
+		arg.flags = 0;
+		si_code = TEST_SICODE_PRIV;
+	}
+
+	for (i = 0; i < SIGNR; ) {
+		arg.off = i;
+		ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, siginfo);
+		if (ret == -1) {
+			err("ptrace() failed: %m\n");
+			goto out;
+		}
+
+		if (ret == 0)
+			break;
+
+		for (j = 0; j < ret; j++, i++) {
+			if (siginfo[j].si_code == si_code &&
+			    siginfo[j].si_int == i)
+				continue;
+
+			err("%d: Wrong siginfo i=%d si_code=%d si_int=%d\n",
+			     shared, i, siginfo[j].si_code, siginfo[j].si_int);
+			goto out;
+		}
+	}
+
+	if (i != SIGNR) {
+		err("Only %d signals were read\n", i);
+		goto out;
+	}
+
+	exit_code = 0;
+out:
+	return exit_code;
+}
+
+int main(int argc, char *argv[])
+{
+	siginfo_t siginfo[SIGNR];
+	int i, exit_code = 1;
+	sigset_t blockmask;
+	pid_t child;
+
+	sigemptyset(&blockmask);
+	sigaddset(&blockmask, SIGRTMIN);
+	sigprocmask(SIG_BLOCK, &blockmask, NULL);
+
+	child = fork();
+	if (child == -1) {
+		err("fork() failed: %m");
+		return 1;
+	} else if (child == 0) {
+		pid_t ppid = getppid();
+		while (1) {
+			if (ppid != getppid())
+				break;
+			sleep(1);
+		}
+		return 1;
+	}
+
+	/* Send signals in process-wide and per-thread queues */
+	for (i = 0; i < SIGNR; i++) {
+		siginfo->si_code = TEST_SICODE_SHARE;
+		siginfo->si_int = i;
+		sys_rt_sigqueueinfo(child, SIGRTMIN, siginfo);
+
+		siginfo->si_code = TEST_SICODE_PRIV;
+		siginfo->si_int = i;
+		sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, siginfo);
+	}
+
+	if (sys_ptrace(PTRACE_ATTACH, child, NULL, NULL) == -1)
+		return 1;
+
+	waitpid(child, NULL, 0);
+
+	/* Dump signals one by one*/
+	if (check_direct_path(child, 0, 1))
+		goto out;
+	/* Dump all signals for one call */
+	if (check_direct_path(child, 0, SIGNR))
+		goto out;
+
+	/*
+	 * Dump signal from the process-wide queue.
+	 * The number of signals is not multible to the buffer size
+	 */
+	if (check_direct_path(child, 1, 3))
+		goto out;
+
+	if (check_error_paths(child))
+		goto out;
+
+	printf("PASS\n");
+	exit_code = 0;
+out:
+	if (sys_ptrace(PTRACE_KILL, child, NULL, NULL) == -1)
+		return 1;
+
+	waitpid(child, NULL, 0);
+
+	return exit_code;
+}
diff --git a/tools/testing/selftests/rcutorture/.gitignore b/tools/testing/selftests/rcutorture/.gitignore
new file mode 100644
index 000000000..ccc240275
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/.gitignore
@@ -0,0 +1,4 @@
+initrd
+b[0-9]*
+res
+*.swp
diff --git a/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
new file mode 100755
index 000000000..43540f182
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+#
+# Extract the number of CPUs expected from the specified Kconfig-file
+# fragment by checking CONFIG_SMP and CONFIG_NR_CPUS.  If the specified
+# file gives no clue, base the number on the number of idle CPUs on
+# the system.
+#
+# Usage: configNR_CPUS.sh config-frag
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+cf=$1
+if test ! -r $cf
+then
+	echo Unreadable config fragment $cf 1>&2
+	exit -1
+fi
+if grep -q '^CONFIG_SMP=n$' $cf
+then
+	echo 1
+	exit 0
+fi
+if grep -q '^CONFIG_NR_CPUS=' $cf
+then
+	grep '^CONFIG_NR_CPUS=' $cf | 
+		sed -e 's/^CONFIG_NR_CPUS=\([0-9]*\).*$/\1/'
+	exit 0
+fi
+cpus2use.sh
diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh
new file mode 100755
index 000000000..ef7fcbac3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/config_override.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# config_override.sh base override
+#
+# Combines base and override, removing any Kconfig options from base
+# that conflict with any in override, concatenating what remains and
+# sending the result to standard output.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2017
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+base=$1
+if test -r $base
+then
+	:
+else
+	echo Base file $base unreadable!!!
+	exit 1
+fi
+
+override=$2
+if test -r $override
+then
+	:
+else
+	echo Override file $override unreadable!!!
+	exit 1
+fi
+
+T=${TMPDIR-/tmp}/config_override.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' |
+	awk '
+	{
+		if (last)
+			print last " |";
+		last = $0;
+	}
+	END {
+		if (last)
+			print last;
+	}' > $T/script
+sh $T/script < $base
+cat $override
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
new file mode 100755
index 000000000..197deece7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Usage: configcheck.sh .config .config-template
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/abat-chk-config.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+cat $1 > $T/.config
+
+cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
+awk	'
+{
+		print "if grep -q \"" $0 "\" < '"$T/.config"'";
+		print "then";
+		print "\t:";
+		print "else";
+		if ($1 == "#") {
+			print "\tif grep -q \"" $2 "\" < '"$T/.config"'";
+			print "\tthen";
+			print "\t\tif test \"$firsttime\" = \"\""
+			print "\t\tthen"
+			print "\t\t\tfirsttime=1"
+			print "\t\tfi"
+			print "\t\techo \":" $2 ": improperly set\"";
+			print "\telse";
+			print "\t\t:";
+			print "\tfi";
+		} else {
+			print "\tif test \"$firsttime\" = \"\""
+			print "\tthen"
+			print "\t\tfirsttime=1"
+			print "\tfi"
+			print "\techo \":" $0 ": improperly set\"";
+		}
+		print "fi";
+	}' | sh
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
new file mode 100755
index 000000000..65541c21a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+# Usage: configinit.sh config-spec-file build-output-dir results-dir
+#
+# Create a .config file from the spec file.  Run from the kernel source tree.
+# Exits with 0 if all went well, with 1 if all went well but the config
+# did not match, and some other number for other failures.
+#
+# The first argument is the .config specification file, which contains
+# desired settings, for example, "CONFIG_NO_HZ=y".  For best results,
+# this should be a full pathname.
+#
+# The second argument is a optional path to a build output directory,
+# for example, "O=/tmp/foo".  If this argument is omitted, the .config
+# file will be generated directly in the current directory.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/configinit.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+# Capture config spec file.
+
+c=$1
+buildloc=$2
+resdir=$3
+builddir=
+if echo $buildloc | grep -q '^O='
+then
+	builddir=`echo $buildloc | sed -e 's/^O=//'`
+	if test ! -d $builddir
+	then
+		mkdir $builddir
+	fi
+else
+	echo Bad build directory: \"$buildloc\"
+	exit 2
+fi
+
+sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh
+sed -e 's/^\(CONFIG[0-9A-Z_]*=\).*$/grep -v \1 |/' < $c >> $T/u.sh
+grep '^grep' < $T/u.sh > $T/upd.sh
+echo "cat - $c" >> $T/upd.sh
+make mrproper
+make $buildloc distclean > $resdir/Make.distclean 2>&1
+make $buildloc $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
+mv $builddir/.config $builddir/.config.sav
+sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
+cp $builddir/.config $builddir/.config.new
+yes '' | make $buildloc oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
+
+# verify new config matches specification.
+configcheck.sh $builddir/.config $c
+
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
new file mode 100755
index 000000000..bb99cde3f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Get an estimate of how CPU-hoggy to be.
+#
+# Usage: cpus2use.sh
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+ncpus=`grep '^processor' /proc/cpuinfo | wc -l`
+idlecpus=`mpstat | tail -1 | \
+	awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
+awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null '
+BEGIN {
+	cpus2use = idlecpus;
+	if (cpus2use < 1)
+		cpus2use = 1;
+	if (cpus2use < ncpus / 10)
+		cpus2use = ncpus / 10;
+	if (cpus2use == int(cpus2use))
+		cpus2use = int(cpus2use)
+	else
+		cpus2use = int(cpus2use) + 1
+	print cpus2use;
+}'
+
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
new file mode 100644
index 000000000..65f665502
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -0,0 +1,279 @@
+#!/bin/bash
+#
+# Shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# bootparam_hotplug_cpu bootparam-string
+#
+# Returns 1 if the specified boot-parameter string tells rcutorture to
+# test CPU-hotplug operations.
+bootparam_hotplug_cpu () {
+	echo "$1" | grep -q "rcutorture\.onoff_"
+}
+
+# checkarg --argname argtype $# arg mustmatch cannotmatch
+#
+# Checks the specified argument "arg" against the mustmatch and cannotmatch
+# patterns.
+checkarg () {
+	if test $3 -le 1
+	then
+		echo $1 needs argument $2 matching \"$5\"
+		usage
+	fi
+	if echo "$4" | grep -q -e "$5"
+	then
+		:
+	else
+		echo $1 $2 \"$4\" must match \"$5\"
+		usage
+	fi
+	if echo "$4" | grep -q -e "$6"
+	then
+		echo $1 $2 \"$4\" must not match \"$6\"
+		usage
+	fi
+}
+
+# configfrag_boot_params bootparam-string config-fragment-file
+#
+# Adds boot parameters from the .boot file, if any.
+configfrag_boot_params () {
+	if test -r "$2.boot"
+	then
+		echo $1 `grep -v '^#' "$2.boot" | tr '\012' ' '`
+	else
+		echo $1
+	fi
+}
+
+# configfrag_boot_cpus bootparam-string config-fragment-file config-cpus
+#
+# Decreases number of CPUs based on any nr_cpus= boot parameters specified.
+configfrag_boot_cpus () {
+	local bootargs="`configfrag_boot_params "$1" "$2"`"
+	local nr_cpus
+	if echo "${bootargs}" | grep -q 'nr_cpus=[0-9]'
+	then
+		nr_cpus="`echo "${bootargs}" | sed -e 's/^.*nr_cpus=\([0-9]*\).*$/\1/'`"
+		if test "$3" -gt "$nr_cpus"
+		then
+			echo $nr_cpus
+		else
+			echo $3
+		fi
+	else
+		echo $3
+	fi
+}
+
+# configfrag_boot_maxcpus bootparam-string config-fragment-file config-cpus
+#
+# Decreases number of CPUs based on any maxcpus= boot parameters specified.
+# This allows tests where additional CPUs come online later during the
+# test run.  However, the torture parameters will be set based on the
+# number of CPUs initially present, so the scripting should schedule
+# test runs based on the maxcpus= boot parameter controlling the initial
+# number of CPUs instead of on the ultimate number of CPUs.
+configfrag_boot_maxcpus () {
+	local bootargs="`configfrag_boot_params "$1" "$2"`"
+	local maxcpus
+	if echo "${bootargs}" | grep -q 'maxcpus=[0-9]'
+	then
+		maxcpus="`echo "${bootargs}" | sed -e 's/^.*maxcpus=\([0-9]*\).*$/\1/'`"
+		if test "$3" -gt "$maxcpus"
+		then
+			echo $maxcpus
+		else
+			echo $3
+		fi
+	else
+		echo $3
+	fi
+}
+
+# configfrag_hotplug_cpu config-fragment-file
+#
+# Returns 1 if the config fragment specifies hotplug CPU.
+configfrag_hotplug_cpu () {
+	if test ! -r "$1"
+	then
+		echo Unreadable config fragment "$1" 1>&2
+		exit -1
+	fi
+	grep -q '^CONFIG_HOTPLUG_CPU=y$' "$1"
+}
+
+# identify_boot_image qemu-cmd
+#
+# Returns the relative path to the kernel build image.  This will be
+# arch/<arch>/boot/bzImage or vmlinux if bzImage is not a target for the
+# architecture, unless overridden with the TORTURE_BOOT_IMAGE environment
+# variable.
+identify_boot_image () {
+	if test -n "$TORTURE_BOOT_IMAGE"
+	then
+		echo $TORTURE_BOOT_IMAGE
+	else
+		case "$1" in
+		qemu-system-x86_64|qemu-system-i386)
+			echo arch/x86/boot/bzImage
+			;;
+		qemu-system-aarch64)
+			echo arch/arm64/boot/Image
+			;;
+		*)
+			echo vmlinux
+			;;
+		esac
+	fi
+}
+
+# identify_qemu builddir
+#
+# Returns our best guess as to which qemu command is appropriate for
+# the kernel at hand.  Override with the TORTURE_QEMU_CMD environment variable.
+identify_qemu () {
+	local u="`file "$1"`"
+	if test -n "$TORTURE_QEMU_CMD"
+	then
+		echo $TORTURE_QEMU_CMD
+	elif echo $u | grep -q x86-64
+	then
+		echo qemu-system-x86_64
+	elif echo $u | grep -q "Intel 80386"
+	then
+		echo qemu-system-i386
+	elif echo $u | grep -q aarch64
+	then
+		echo qemu-system-aarch64
+	elif uname -a | grep -q ppc64
+	then
+		echo qemu-system-ppc64
+	else
+		echo Cannot figure out what qemu command to use! 1>&2
+		echo file $1 output: $u
+		# Usually this will be one of /usr/bin/qemu-system-*
+		# Use TORTURE_QEMU_CMD environment variable or appropriate
+		# argument to top-level script.
+		exit 1
+	fi
+}
+
+# identify_qemu_append qemu-cmd
+#
+# Output arguments for the qemu "-append" string based on CPU type
+# and the TORTURE_QEMU_INTERACTIVE environment variable.
+identify_qemu_append () {
+	local console=ttyS0
+	case "$1" in
+	qemu-system-x86_64|qemu-system-i386)
+		echo noapic selinux=0 initcall_debug debug
+		;;
+	qemu-system-aarch64)
+		console=ttyAMA0
+		;;
+	esac
+	if test -n "$TORTURE_QEMU_INTERACTIVE"
+	then
+		echo root=/dev/sda
+	else
+		echo console=$console
+	fi
+}
+
+# identify_qemu_args qemu-cmd serial-file
+#
+# Output arguments for qemu arguments based on the TORTURE_QEMU_MAC
+# and TORTURE_QEMU_INTERACTIVE environment variables.
+identify_qemu_args () {
+	case "$1" in
+	qemu-system-x86_64|qemu-system-i386)
+		;;
+	qemu-system-aarch64)
+		echo -machine virt,gic-version=host -cpu host
+		;;
+	qemu-system-ppc64)
+		echo -enable-kvm -M pseries -nodefaults
+		echo -device spapr-vscsi
+		if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
+		then
+			echo -device spapr-vlan,netdev=net0,mac=$TORTURE_QEMU_MAC
+			echo -netdev bridge,br=br0,id=net0
+		elif test -n "$TORTURE_QEMU_INTERACTIVE"
+		then
+			echo -net nic -net user
+		fi
+		;;
+	esac
+	if test -n "$TORTURE_QEMU_INTERACTIVE"
+	then
+		echo -monitor stdio -serial pty -S
+	else
+		echo -serial file:$2
+	fi
+}
+
+# identify_qemu_vcpus
+#
+# Returns the number of virtual CPUs available to the aggregate of the
+# guest OSes.
+identify_qemu_vcpus () {
+	lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://'
+}
+
+# print_bug
+#
+# Prints "BUG: " in red followed by remaining arguments
+print_bug () {
+	printf '\033[031mBUG: \033[m'
+	echo $*
+}
+
+# print_warning
+#
+# Prints "WARNING: " in yellow followed by remaining arguments
+print_warning () {
+	printf '\033[033mWARNING: \033[m'
+	echo $*
+}
+
+# specify_qemu_cpus qemu-cmd qemu-args #cpus
+#
+# Appends a string containing "-smp XXX" to qemu-args, unless the incoming
+# qemu-args already contains "-smp".
+specify_qemu_cpus () {
+	local nt;
+
+	if echo $2 | grep -q -e -smp
+	then
+		echo $2
+	else
+		case "$1" in
+		qemu-system-x86_64|qemu-system-i386|qemu-system-aarch64)
+			echo $2 -smp $3
+			;;
+		qemu-system-ppc64)
+			nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,\([0-9]*\),.*$/\1/'`"
+			echo $2 -smp cores=`expr \( $3 + $nt - 1 \) / $nt`,threads=$nt
+			;;
+		esac
+	fi
+}
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
new file mode 100755
index 000000000..363382837
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Alternate sleeping and spinning on randomly selected CPUs.  The purpose
+# of this script is to inflict random OS jitter on a concurrently running
+# test.
+#
+# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ]
+#
+# me: Random-number-generator seed salt.
+# duration: Time to run in seconds.
+# sleepmax: Maximum microseconds to sleep, defaults to one second.
+# spinmax: Maximum microseconds to spin, defaults to one millisecond.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+me=$(($1 * 1000))
+duration=$2
+sleepmax=${3-1000000}
+spinmax=${4-1000}
+
+n=1
+
+starttime=`awk 'BEGIN { print systime(); }' < /dev/null`
+
+while :
+do
+	# Check for done.
+	t=`awk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null`
+	if test "$t" -gt "$duration"
+	then
+		exit 0;
+	fi
+
+	# Set affinity to randomly selected CPU
+	cpus=`ls /sys/devices/system/cpu/*/online |
+		sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' |
+		grep -v '^0*$'`
+	cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
+		srand(n + me + systime());
+		ncpus = split(cpus, ca);
+		curcpu = ca[int(rand() * ncpus + 1)];
+		mask = lshift(1, curcpu);
+		if (mask + 0 <= 0)
+			mask = 1;
+		printf("%#x\n", mask);
+	}' < /dev/null`
+	n=$(($n+1))
+	if ! taskset -p $cpumask $$ > /dev/null 2>&1
+	then
+		echo taskset failure: '"taskset -p ' $cpumask $$ '"'
+		exit 1
+	fi
+
+	# Sleep a random duration
+	sleeptime=`awk -v me=$me -v n=$n -v sleepmax=$sleepmax 'BEGIN {
+		srand(n + me + systime());
+		printf("%06d", int(rand() * sleepmax));
+	}' < /dev/null`
+	n=$(($n+1))
+	sleep .$sleeptime
+
+	# Spin a random duration
+	limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN {
+		srand(n + me + systime());
+		printf("%06d", int(rand() * spinmax));
+	}' < /dev/null`
+	n=$(($n+1))
+	for i in {1..$limit}
+	do
+		echo > /dev/null
+	done
+done
+
+exit 1
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
new file mode 100755
index 000000000..9115fcdb5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+#
+# Build a kvm-ready Linux kernel from the tree in the current directory.
+#
+# Usage: kvm-build.sh config-template build-dir resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+config_template=${1}
+if test -z "$config_template" -o ! -f "$config_template" -o ! -r "$config_template"
+then
+	echo "kvm-build.sh :$config_template: Not a readable file"
+	exit 1
+fi
+builddir=${2}
+resdir=${3}
+
+T=${TMPDIR-/tmp}/test-linux.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+cp ${config_template} $T/config
+cat << ___EOF___ >> $T/config
+CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD"
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_CONSOLE=y
+___EOF___
+
+configinit.sh $T/config O=$builddir $resdir
+retval=$?
+if test $retval -gt 1
+then
+	exit 2
+fi
+ncpus=`cpus2use.sh`
+make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
+retval=$?
+if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
+then
+	echo Kernel build error
+	egrep "Stop|Error|error:|warning:" < $resdir/Make.out
+	echo Run aborted.
+	exit 3
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
new file mode 100755
index 000000000..98f650c9b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# Invoke a text editor on all console.log files for all runs with diagnostics,
+# that is, on all such files having a console.log.diags counterpart.
+# Note that both console.log.diags and console.log are passed to the
+# editor (currently defaulting to "vi"), allowing the user to get an
+# idea of what to search for in the console.log file.
+#
+# Usage: kvm-find-errors.sh directory
+#
+# The "directory" above should end with the date/time directory, for example,
+# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+
+rundir="${1}"
+if test -z "$rundir" -o ! -d "$rundir"
+then
+	echo Usage: $0 directory
+fi
+editor=${EDITOR-vi}
+
+# Find builds with errors
+files=
+for i in ${rundir}/*/Make.out
+do
+	if egrep -q "error:|warning:" < $i
+	then
+		egrep "error:|warning:" < $i > $i.diags
+		files="$files $i.diags $i"
+	fi
+done
+if test -n "$files"
+then
+	$editor $files
+else
+	echo No build errors.
+fi
+if grep -q -e "--buildonly" < ${rundir}/log
+then
+	echo Build-only run, no console logs to check.
+fi
+
+# Find console logs with errors
+files=
+for i in ${rundir}/*/console.log
+do
+	if test -r $i.diags
+	then
+		files="$files $i.diags $i"
+	fi
+done
+if test -n "$files"
+then
+	$editor $files
+else
+	echo No errors in console logs.
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
new file mode 100755
index 000000000..2de92f43e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Analyze a given results directory for locktorture progress.
+#
+# Usage: kvm-recheck-lock.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2014
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+	:
+else
+	echo Unreadable results directory: $i
+	exit 1
+fi
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+ncs=`grep "Writes:  Total:" $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* Total: //' -e 's/ .*$//'`
+if test -z "$ncs"
+then
+	echo "$configfile -------"
+else
+	title="$configfile ------- $ncs acquisitions/releases"
+	dur=`sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+	if test -z "$dur"
+	then
+		:
+	else
+		ncsps=`awk -v ncs=$ncs -v dur=$dur '
+			BEGIN { print ncs / dur }' < /dev/null`
+		title="$title ($ncsps per second)"
+	fi
+	echo $title
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
new file mode 100755
index 000000000..0fa8a61cc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcutorture progress.
+#
+# Usage: kvm-recheck-rcu.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2014
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+	:
+else
+	echo Unreadable results directory: $i
+	exit 1
+fi
+. functions.sh
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
+stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
+	    tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
+	    awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
+	    tr -d '\012\015'`"
+if test -z "$ngps"
+then
+	echo "$configfile ------- " $stopstate
+else
+	title="$configfile ------- $ngps GPs"
+	dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+	if test -z "$dur"
+	then
+		:
+	else
+		ngpsps=`awk -v ngps=$ngps -v dur=$dur '
+			BEGIN { print ngps / dur }' < /dev/null`
+		title="$title ($ngpsps/s)"
+	fi
+	echo $title $stopstate
+	nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
+	if test -z "$nclosecalls"
+	then
+		exit 0
+	fi
+	if test "$nclosecalls" -eq 0
+	then
+		exit 0
+	fi
+	# Compute number of close calls per tenth of an hour
+	nclosecalls10=`awk -v nclosecalls=$nclosecalls -v dur=$dur 'BEGIN { print int(nclosecalls * 36000 / dur) }' < /dev/null`
+	if test $nclosecalls10 -gt 5 -a $nclosecalls -gt 1
+	then
+		print_bug $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i
+	else
+		print_warning $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i
+	fi
+	echo $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i > $i/console.log.rcu.diags
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
new file mode 100755
index 000000000..8948f7926
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements,
+# looking for ftrace data.  Exits with 0 if data was found, analyzed, and
+# printed.  Intended to be invoked from kvm-recheck-rcuperf.sh after
+# argument checking.
+#
+# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+. functions.sh
+
+if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100
+then
+	exit 10
+fi
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+grep 'us : rcu_exp_grace_period' |
+sed -e 's/us : / : /' |
+tr -d '\015' |
+awk '
+$8 == "start" {
+	if (startseq != "")
+		nlost++;
+	starttask = $1;
+	starttime = $3;
+	startseq = $7;
+	seqtask[startseq] = starttask;
+}
+
+$8 == "end" {
+	if (startseq == $7) {
+		curgpdur = $3 - starttime;
+		gptimes[++n] = curgpdur;
+		gptaskcnt[starttask]++;
+		sum += curgpdur;
+		if (curgpdur > 1000)
+			print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
+		startseq = "";
+	} else {
+		# Lost a message or some such, reset.
+		startseq = "";
+		nlost++;
+	}
+}
+
+$8 == "done" && seqtask[$7] != $1 {
+	piggybackcnt[$1]++;
+}
+
+END {
+	newNR = asort(gptimes);
+	if (newNR <= 0) {
+		print "No ftrace records found???"
+		exit 10;
+	}
+	pct50 = int(newNR * 50 / 100);
+	if (pct50 < 1)
+		pct50 = 1;
+	pct90 = int(newNR * 90 / 100);
+	if (pct90 < 1)
+		pct90 = 1;
+	pct99 = int(newNR * 99 / 100);
+	if (pct99 < 1)
+		pct99 = 1;
+	div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+	print "Histogram bucket size: " div;
+	last = gptimes[1] - 10;
+	count = 0;
+	for (i = 1; i <= newNR; i++) {
+		current = div * int(gptimes[i] / div);
+		if (last == current) {
+			count++;
+		} else {
+			if (count > 0)
+				print last, count;
+			count = 1;
+			last = current;
+		}
+	}
+	if (count > 0)
+		print last, count;
+	print "Distribution of grace periods across tasks:";
+	for (i in gptaskcnt) {
+		print "\t" i, gptaskcnt[i];
+		nbatches += gptaskcnt[i];
+	}
+	ngps = nbatches;
+	print "Distribution of piggybacking across tasks:";
+	for (i in piggybackcnt) {
+		print "\t" i, piggybackcnt[i];
+		ngps += piggybackcnt[i];
+	}
+	print "Average grace-period duration: " sum / newNR " microseconds";
+	print "Minimum grace-period duration: " gptimes[1];
+	print "50th percentile grace-period duration: " gptimes[pct50];
+	print "90th percentile grace-period duration: " gptimes[pct90];
+	print "99th percentile grace-period duration: " gptimes[pct99];
+	print "Maximum grace-period duration: " gptimes[newNR];
+	print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches " Lost: " nlost + 0;
+	print "Computed from ftrace data.";
+}'
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
new file mode 100755
index 000000000..ccebf772f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements.
+#
+# Usage: kvm-recheck-rcuperf.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+	:
+else
+	echo Unreadable results directory: $i
+	exit 1
+fi
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. functions.sh
+
+if kvm-recheck-rcuperf-ftrace.sh $i
+then
+	# ftrace data was successfully analyzed, call it good!
+	exit 0
+fi
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+awk '
+/-perf: .* gps: .* batches:/ {
+	ngps = $9;
+	nbatches = $11;
+}
+
+/-perf: .*writer-duration/ {
+	gptimes[++n] = $5 / 1000.;
+	sum += $5 / 1000.;
+}
+
+END {
+	newNR = asort(gptimes);
+	if (newNR <= 0) {
+		print "No rcuperf records found???"
+		exit;
+	}
+	pct50 = int(newNR * 50 / 100);
+	if (pct50 < 1)
+		pct50 = 1;
+	pct90 = int(newNR * 90 / 100);
+	if (pct90 < 1)
+		pct90 = 1;
+	pct99 = int(newNR * 99 / 100);
+	if (pct99 < 1)
+		pct99 = 1;
+	div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+	print "Histogram bucket size: " div;
+	last = gptimes[1] - 10;
+	count = 0;
+	for (i = 1; i <= newNR; i++) {
+		current = div * int(gptimes[i] / div);
+		if (last == current) {
+			count++;
+		} else {
+			if (count > 0)
+				print last, count;
+			count = 1;
+			last = current;
+		}
+	}
+	if (count > 0)
+		print last, count;
+	print "Average grace-period duration: " sum / newNR " microseconds";
+	print "Minimum grace-period duration: " gptimes[1];
+	print "50th percentile grace-period duration: " gptimes[pct50];
+	print "90th percentile grace-period duration: " gptimes[pct90];
+	print "99th percentile grace-period duration: " gptimes[pct99];
+	print "Maximum grace-period duration: " gptimes[newNR];
+	print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
+	print "Computed from rcuperf printk output.";
+}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
new file mode 100755
index 000000000..c9bab57a7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+#
+# Given the results directories for previous KVM-based torture runs,
+# check the build and console output for errors.  Given a directory
+# containing results directories, this recursively checks them all.
+#
+# Usage: kvm-recheck.sh resdir ...
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. functions.sh
+for rd in "$@"
+do
+	firsttime=1
+	dirs=`find $rd -name Make.defconfig.out -print | sort | sed -e 's,/[^/]*$,,' | sort -u`
+	for i in $dirs
+	do
+		if test -n "$firsttime"
+		then
+			firsttime=""
+			resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'`
+			head -1 $resdir/log
+		fi
+		TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
+		rm -f $i/console.log.*.diags
+		kvm-recheck-${TORTURE_SUITE}.sh $i
+		if test -f "$i/console.log"
+		then
+			configcheck.sh $i/.config $i/ConfigFragment
+			if test -r $i/Make.oldconfig.err
+			then
+				cat $i/Make.oldconfig.err
+			fi
+			parse-build.sh $i/Make.out $configfile
+			parse-console.sh $i/console.log $configfile
+			if test -r $i/Warnings
+			then
+				cat $i/Warnings
+			fi
+		else
+			if test -f "$i/qemu-cmd"
+			then
+				print_bug qemu failed
+				echo "   $i"
+			elif test -f "$i/buildonly"
+			then
+				echo Build-only run, no boot/test
+				configcheck.sh $i/.config $i/ConfigFragment
+				parse-build.sh $i/Make.out $configfile
+			else
+				print_bug Build failed
+				echo "   $i"
+			fi
+		fi
+	done
+done
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
new file mode 100755
index 000000000..f7247ee00
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -0,0 +1,271 @@
+#!/bin/bash
+#
+# Run a kvm-based test of the specified tree on the specified configs.
+# Fully automated run and error checking, no graphics console.
+#
+# Execute this in the source tree.  Do not run it as a background task
+# because qemu does not seem to like that much.
+#
+# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
+#
+# qemu-args defaults to "-enable-kvm -nographic", along with arguments
+#			specifying the number of CPUs and other options
+#			generated from the underlying CPU architecture.
+# boot_args defaults to value returned by the per_version_boot_params
+#			shell function.
+#
+# Anything you specify for either qemu-args or boot_args is appended to
+# the default values.  The "-smp" value is deduced from the contents of
+# the config fragment.
+#
+# More sophisticated argument parsing is clearly needed.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+. functions.sh
+. $CONFIGFRAG/ver_functions.sh
+
+config_template=${1}
+config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'`
+title=`echo $config_template | sed -e 's/^.*\///'`
+builddir=${2}
+if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir"
+then
+	echo "kvm-test-1-run.sh :$builddir: Not a writable directory, cannot build into it"
+	exit 1
+fi
+resdir=${3}
+if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir"
+then
+	echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it"
+	exit 1
+fi
+echo ' ---' `date`: Starting build
+echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
+touch $resdir/ConfigFragment.input $resdir/ConfigFragment
+if test -r "$config_dir/CFcommon"
+then
+	echo " --- $config_dir/CFcommon" >> $resdir/ConfigFragment.input
+	cat < $config_dir/CFcommon >> $resdir/ConfigFragment.input
+	config_override.sh $config_dir/CFcommon $config_template > $T/Kc1
+	grep '#CHECK#' $config_dir/CFcommon >> $resdir/ConfigFragment
+else
+	cp $config_template $T/Kc1
+fi
+echo " --- $config_template" >> $resdir/ConfigFragment.input
+cat $config_template >> $resdir/ConfigFragment.input
+grep '#CHECK#' $config_template >> $resdir/ConfigFragment
+if test -n "$TORTURE_KCONFIG_ARG"
+then
+	echo $TORTURE_KCONFIG_ARG | tr -s " " "\012" > $T/cmdline
+	echo " --- --kconfig argument" >> $resdir/ConfigFragment.input
+	cat $T/cmdline >> $resdir/ConfigFragment.input
+	config_override.sh $T/Kc1 $T/cmdline > $T/Kc2
+	# Note that "#CHECK#" is not permitted on commandline.
+else
+	cp $T/Kc1 $T/Kc2
+fi
+cat $T/Kc2 >> $resdir/ConfigFragment
+
+base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
+if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
+then
+	# Rerunning previous test, so use that test's kernel.
+	QEMU="`identify_qemu $base_resdir/vmlinux`"
+	BOOT_IMAGE="`identify_boot_image $QEMU`"
+	KERNEL=$base_resdir/${BOOT_IMAGE##*/} # use the last component of ${BOOT_IMAGE}
+	ln -s $base_resdir/Make*.out $resdir  # for kvm-recheck.sh
+	ln -s $base_resdir/.config $resdir  # for kvm-recheck.sh
+	# Arch-independent indicator
+	touch $resdir/builtkernel
+elif kvm-build.sh $T/Kc2 $builddir $resdir
+then
+	# Had to build a kernel for this test.
+	QEMU="`identify_qemu $builddir/vmlinux`"
+	BOOT_IMAGE="`identify_boot_image $QEMU`"
+	cp $builddir/vmlinux $resdir
+	cp $builddir/.config $resdir
+	cp $builddir/Module.symvers $resdir > /dev/null || :
+	cp $builddir/System.map $resdir > /dev/null || :
+	if test -n "$BOOT_IMAGE"
+	then
+		cp $builddir/$BOOT_IMAGE $resdir
+		KERNEL=$resdir/${BOOT_IMAGE##*/}
+		# Arch-independent indicator
+		touch $resdir/builtkernel
+	else
+		echo No identifiable boot image, not running KVM, see $resdir.
+		echo Do the torture scripts know about your architecture?
+	fi
+	parse-build.sh $resdir/Make.out $title
+else
+	# Build failed.
+	cp $builddir/Make*.out $resdir
+	cp $builddir/.config $resdir || :
+	echo Build failed, not running KVM, see $resdir.
+	if test -f $builddir.wait
+	then
+		mv $builddir.wait $builddir.ready
+	fi
+	exit 1
+fi
+if test -f $builddir.wait
+then
+	mv $builddir.wait $builddir.ready
+fi
+while test -f $builddir.ready
+do
+	sleep 1
+done
+seconds=$4
+qemu_args=$5
+boot_args=$6
+
+cd $KVM
+kstarttime=`awk 'BEGIN { print systime() }' < /dev/null`
+if test -z "$TORTURE_BUILDONLY"
+then
+	echo ' ---' `date`: Starting kernel
+fi
+
+# Generate -smp qemu argument.
+qemu_args="-enable-kvm -nographic $qemu_args"
+cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
+cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
+vcpus=`identify_qemu_vcpus`
+if test $cpu_count -gt $vcpus
+then
+	echo CPU count limited from $cpu_count to $vcpus | tee -a $resdir/Warnings
+	cpu_count=$vcpus
+fi
+qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`"
+
+# Generate architecture-specific and interaction-specific qemu arguments
+qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
+
+# Generate qemu -append arguments
+qemu_append="`identify_qemu_append "$QEMU"`"
+
+# Pull in Kconfig-fragment boot parameters
+boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
+# Generate kernel-version-specific boot parameters
+boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
+
+if test -n "$TORTURE_BUILDONLY"
+then
+	echo Build-only run specified, boot/test omitted.
+	touch $resdir/buildonly
+	exit 0
+fi
+echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
+echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
+( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+commandcompleted=0
+sleep 10 # Give qemu's pid a chance to reach the file
+if test -s "$resdir/qemu_pid"
+then
+	qemu_pid=`cat "$resdir/qemu_pid"`
+	echo Monitoring qemu job at pid $qemu_pid
+else
+	qemu_pid=""
+	echo Monitoring qemu job at yet-as-unknown pid
+fi
+while :
+do
+	if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+	then
+		qemu_pid=`cat "$resdir/qemu_pid"`
+	fi
+	kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+	if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
+	then
+		if test $kruntime -ge $seconds
+		then
+			break;
+		fi
+		sleep 1
+	else
+		commandcompleted=1
+		if test $kruntime -lt $seconds
+		then
+			echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
+			grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1
+			killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
+			if test -n "$killpid"
+			then
+				echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
+				ps -fp $killpid >> $resdir/Warnings 2>&1
+			fi
+		else
+			echo ' ---' `date`: "Kernel done"
+		fi
+		break
+	fi
+done
+if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+then
+	qemu_pid=`cat "$resdir/qemu_pid"`
+fi
+if test $commandcompleted -eq 0 -a -n "$qemu_pid"
+then
+	echo Grace period for qemu job at pid $qemu_pid
+	oldline="`tail $resdir/console.log`"
+	while :
+	do
+		kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+		if kill -0 $qemu_pid > /dev/null 2>&1
+		then
+			:
+		else
+			break
+		fi
+		must_continue=no
+		newline="`tail $resdir/console.log`"
+		if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : '
+		then
+			must_continue=yes
+		fi
+		last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
+		if test -z "last_ts"
+		then
+			last_ts=0
+		fi
+		if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
+		then
+			must_continue=yes
+		fi
+		if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+		then
+			echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
+			kill -KILL $qemu_pid
+			break
+		fi
+		oldline=$newline
+		sleep 10
+	done
+elif test -z "$qemu_pid"
+then
+	echo Unknown PID, cannot kill qemu command
+fi
+
+parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
new file mode 100755
index 000000000..5a7a62d76
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -0,0 +1,471 @@
+#!/bin/bash
+#
+# Run a series of tests under KVM.  By default, this series is specified
+# by the relevant CFLIST file, but can be overridden by the --configs
+# command-line argument.
+#
+# Usage: kvm.sh [ options ]
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+scriptname=$0
+args="$*"
+
+T=${TMPDIR-/tmp}/kvm.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+cd `dirname $scriptname`/../../../../../
+
+dur=$((30*60))
+dryrun=""
+KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
+PATH=${KVM}/bin:$PATH; export PATH
+TORTURE_DEFCONFIG=defconfig
+TORTURE_BOOT_IMAGE=""
+TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
+TORTURE_KCONFIG_ARG=""
+TORTURE_KMAKE_ARG=""
+TORTURE_QEMU_MEM=512
+TORTURE_SHUTDOWN_GRACE=180
+TORTURE_SUITE=rcu
+resdir=""
+configs=""
+cpus=0
+ds=`date +%Y.%m.%d-%H:%M:%S`
+jitter="-1"
+
+. functions.sh
+
+usage () {
+	echo "Usage: $scriptname optional arguments:"
+	echo "       --bootargs kernel-boot-arguments"
+	echo "       --bootimage relative-path-to-kernel-boot-image"
+	echo "       --buildonly"
+	echo "       --configs \"config-file list w/ repeat factor (3*TINY01)\""
+	echo "       --cpus N"
+	echo "       --datestamp string"
+	echo "       --defconfig string"
+	echo "       --dryrun sched|script"
+	echo "       --duration minutes"
+	echo "       --interactive"
+	echo "       --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
+	echo "       --kconfig Kconfig-options"
+	echo "       --kmake-arg kernel-make-arguments"
+	echo "       --mac nn:nn:nn:nn:nn:nn"
+	echo "       --memory megabytes | nnnG"
+	echo "       --no-initrd"
+	echo "       --qemu-args qemu-arguments"
+	echo "       --qemu-cmd qemu-system-..."
+	echo "       --results absolute-pathname"
+	echo "       --torture rcu"
+	exit 1
+}
+
+while test $# -gt 0
+do
+	case "$1" in
+	--bootargs|--bootarg)
+		checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
+		TORTURE_BOOTARGS="$2"
+		shift
+		;;
+	--bootimage)
+		checkarg --bootimage "(relative path to kernel boot image)" "$#" "$2" '[a-zA-Z0-9][a-zA-Z0-9_]*' '^--'
+		TORTURE_BOOT_IMAGE="$2"
+		shift
+		;;
+	--buildonly)
+		TORTURE_BUILDONLY=1
+		;;
+	--configs|--config)
+		checkarg --configs "(list of config files)" "$#" "$2" '^[^/]*$' '^--'
+		configs="$2"
+		shift
+		;;
+	--cpus)
+		checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
+		cpus=$2
+		shift
+		;;
+	--datestamp)
+		checkarg --datestamp "(relative pathname)" "$#" "$2" '^[^/]*$' '^--'
+		ds=$2
+		shift
+		;;
+	--defconfig)
+		checkarg --defconfig "defconfigtype" "$#" "$2" '^[^/][^/]*$' '^--'
+		TORTURE_DEFCONFIG=$2
+		shift
+		;;
+	--dryrun)
+		checkarg --dryrun "sched|script" $# "$2" 'sched\|script' '^--'
+		dryrun=$2
+		shift
+		;;
+	--duration)
+		checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error'
+		dur=$(($2*60))
+		shift
+		;;
+	--interactive)
+		TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
+		;;
+	--jitter)
+		checkarg --jitter "(# threads [ sleep [ spin ] ])" $# "$2" '^-\{,1\}[0-9]\+\( \+[0-9]\+\)\{,2\} *$' '^error$'
+		jitter="$2"
+		shift
+		;;
+	--kconfig)
+		checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
+		TORTURE_KCONFIG_ARG="$2"
+		shift
+		;;
+	--kmake-arg)
+		checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
+		TORTURE_KMAKE_ARG="$2"
+		shift
+		;;
+	--mac)
+		checkarg --mac "(MAC address)" $# "$2" '^\([0-9a-fA-F]\{2\}:\)\{5\}[0-9a-fA-F]\{2\}$' error
+		TORTURE_QEMU_MAC=$2
+		shift
+		;;
+	--memory)
+		checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
+		TORTURE_QEMU_MEM=$2
+		shift
+		;;
+	--no-initrd)
+		TORTURE_INITRD=""; export TORTURE_INITRD
+		;;
+	--qemu-args|--qemu-arg)
+		checkarg --qemu-args "(qemu arguments)" $# "$2" '^-' '^error'
+		TORTURE_QEMU_ARG="$2"
+		shift
+		;;
+	--qemu-cmd)
+		checkarg --qemu-cmd "(qemu-system-...)" $# "$2" 'qemu-system-' '^--'
+		TORTURE_QEMU_CMD="$2"
+		shift
+		;;
+	--results)
+		checkarg --results "(absolute pathname)" "$#" "$2" '^/' '^error'
+		resdir=$2
+		shift
+		;;
+	--shutdown-grace)
+		checkarg --shutdown-grace "(seconds)" "$#" "$2" '^[0-9]*$' '^error'
+		TORTURE_SHUTDOWN_GRACE=$2
+		shift
+		;;
+	--torture)
+		checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
+		TORTURE_SUITE=$2
+		shift
+		if test "$TORTURE_SUITE" = rcuperf
+		then
+			# If you really want jitter for rcuperf, specify
+			# it after specifying rcuperf.  (But why?)
+			jitter=0
+		fi
+		;;
+	*)
+		echo Unknown argument $1
+		usage
+		;;
+	esac
+	shift
+done
+
+CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG
+
+if test -z "$configs"
+then
+	configs="`cat $CONFIGFRAG/CFLIST`"
+fi
+
+if test -z "$resdir"
+then
+	resdir=$KVM/res
+fi
+
+# Create a file of test-name/#cpus pairs, sorted by decreasing #cpus.
+touch $T/cfgcpu
+for CF in $configs
+do
+	case $CF in
+	[0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+		config_reps=`echo $CF | sed -e 's/\*.*$//'`
+		CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
+		;;
+	*)
+		config_reps=1
+		CF1=$CF
+		;;
+	esac
+	if test -f "$CONFIGFRAG/$CF1"
+	then
+		cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
+		cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
+		cpu_count=`configfrag_boot_maxcpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
+		for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
+		do
+			echo $CF1 $cpu_count >> $T/cfgcpu
+		done
+	else
+		echo "The --configs file $CF1 does not exist, terminating."
+		exit 1
+	fi
+done
+sort -k2nr $T/cfgcpu -T="$T" > $T/cfgcpu.sort
+
+# Use a greedy bin-packing algorithm, sorting the list accordingly.
+awk < $T/cfgcpu.sort > $T/cfgcpu.pack -v ncpus=$cpus '
+BEGIN {
+	njobs = 0;
+}
+
+{
+	# Read file of tests and corresponding required numbers of CPUs.
+	cf[njobs] = $1;
+	cpus[njobs] = $2;
+	njobs++;
+}
+
+END {
+	batch = 0;
+	nc = -1;
+
+	# Each pass through the following loop creates on test batch
+	# that can be executed concurrently given ncpus.  Note that a
+	# given test that requires more than the available CPUs will run in
+	# their own batch.  Such tests just have to make do with what
+	# is available.
+	while (nc != ncpus) {
+		batch++;
+		nc = ncpus;
+
+		# Each pass through the following loop considers one
+		# test for inclusion in the current batch.
+		for (i = 0; i < njobs; i++) {
+			if (done[i])
+				continue; # Already part of a batch.
+			if (nc >= cpus[i] || nc == ncpus) {
+
+				# This test fits into the current batch.
+				done[i] = batch;
+				nc -= cpus[i];
+				if (nc <= 0)
+					break; # Too-big test in its own batch.
+			}
+		}
+	}
+
+	# Dump out the tests in batch order.
+	for (b = 1; b <= batch; b++)
+		for (i = 0; i < njobs; i++)
+			if (done[i] == b)
+				print cf[i], cpus[i];
+}'
+
+# Generate a script to execute the tests in appropriate batches.
+cat << ___EOF___ > $T/script
+CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
+KVM="$KVM"; export KVM
+PATH="$PATH"; export PATH
+TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
+TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
+TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
+TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
+TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
+TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
+TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
+TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
+TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
+TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
+TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
+TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
+if ! test -e $resdir
+then
+	mkdir -p "$resdir" || :
+fi
+mkdir $resdir/$ds
+echo Results directory: $resdir/$ds
+echo $scriptname $args
+touch $resdir/$ds/log
+echo $scriptname $args >> $resdir/$ds/log
+echo ${TORTURE_SUITE} > $resdir/$ds/TORTURE_SUITE
+pwd > $resdir/$ds/testid.txt
+if test -d .git
+then
+	git status >> $resdir/$ds/testid.txt
+	git rev-parse HEAD >> $resdir/$ds/testid.txt
+	git diff HEAD >> $resdir/$ds/testid.txt
+fi
+___EOF___
+awk < $T/cfgcpu.pack \
+	-v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
+	-v CONFIGDIR="$CONFIGFRAG/" \
+	-v KVM="$KVM" \
+	-v ncpus=$cpus \
+	-v jitter="$jitter" \
+	-v rd=$resdir/$ds/ \
+	-v dur=$dur \
+	-v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
+	-v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
+'BEGIN {
+	i = 0;
+}
+
+{
+	cf[i] = $1;
+	cpus[i] = $2;
+	i++;
+}
+
+# Dump out the scripting required to run one test batch.
+function dump(first, pastlast, batchnum)
+{
+	print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log";
+	print "needqemurun="
+	jn=1
+	for (j = first; j < pastlast; j++) {
+		builddir=KVM "/b1"
+		cpusr[jn] = cpus[j];
+		if (cfrep[cf[j]] == "") {
+			cfr[jn] = cf[j];
+			cfrep[cf[j]] = 1;
+		} else {
+			cfrep[cf[j]]++;
+			cfr[jn] = cf[j] "." cfrep[cf[j]];
+		}
+		if (cpusr[jn] > ncpus && ncpus != 0)
+			ovf = "-ovf";
+		else
+			ovf = "";
+		print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
+		print "rm -f " builddir ".*";
+		print "touch " builddir ".wait";
+		print "mkdir " builddir " > /dev/null 2>&1 || :";
+		print "mkdir " rd cfr[jn] " || :";
+		print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn]  "/kvm-test-1-run.sh.out 2>&1 &"
+		print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
+		print "while test -f " builddir ".wait"
+		print "do"
+		print "\tsleep 1"
+		print "done"
+		print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` | tee -a " rd "log";
+		jn++;
+	}
+	for (j = 1; j < jn; j++) {
+		builddir=KVM "/b" j
+		print "rm -f " builddir ".ready"
+		print "if test -f \"" rd cfr[j] "/builtkernel\""
+		print "then"
+		print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` | tee -a " rd "log";
+		print "\tneedqemurun=1"
+		print "fi"
+	}
+	njitter = 0;
+	split(jitter, ja);
+	if (ja[1] == -1 && ncpus == 0)
+		njitter = 1;
+	else if (ja[1] == -1)
+		njitter = ncpus;
+	else
+		njitter = ja[1];
+	if (TORTURE_BUILDONLY && njitter != 0) {
+		njitter = 0;
+		print "echo Build-only run, so suppressing jitter | tee -a " rd "log"
+	}
+	if (TORTURE_BUILDONLY) {
+		print "needqemurun="
+	}
+	print "if test -n \"$needqemurun\""
+	print "then"
+	print "\techo ---- Starting kernels. `date` | tee -a " rd "log";
+	for (j = 0; j < njitter; j++)
+		print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&"
+	print "\twait"
+	print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log";
+	print "else"
+	print "\twait"
+	print "\techo ---- No kernel runs. `date` | tee -a " rd "log";
+	print "fi"
+	for (j = 1; j < jn; j++) {
+		builddir=KVM "/b" j
+		print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: | tee -a " rd "log";
+		print "cat " rd cfr[j]  "/kvm-test-1-run.sh.out | tee -a " rd "log";
+	}
+}
+
+END {
+	njobs = i;
+	nc = ncpus;
+	first = 0;
+	batchnum = 1;
+
+	# Each pass through the following loop considers one test.
+	for (i = 0; i < njobs; i++) {
+		if (ncpus == 0) {
+			# Sequential test specified, each test its own batch.
+			dump(i, i + 1, batchnum);
+			first = i;
+			batchnum++;
+		} else if (nc < cpus[i] && i != 0) {
+			# Out of CPUs, dump out a batch.
+			dump(first, i, batchnum);
+			first = i;
+			nc = ncpus;
+			batchnum++;
+		}
+		# Account for the CPUs needed by the current test.
+		nc -= cpus[i];
+	}
+	# Dump the last batch.
+	if (ncpus != 0)
+		dump(first, i, batchnum);
+}' >> $T/script
+
+cat << ___EOF___ >> $T/script
+echo
+echo
+echo " --- `date` Test summary:"
+echo Results directory: $resdir/$ds
+kvm-recheck.sh $resdir/$ds
+___EOF___
+
+if test "$dryrun" = script
+then
+	cat $T/script
+	exit 0
+elif test "$dryrun" = sched
+then
+	# Extract the test run schedule from the script.
+	egrep 'Start batch|Starting build\.' $T/script |
+		grep -v ">>" |
+		sed -e 's/:.*$//' -e 's/^echo //'
+	exit 0
+else
+	# Not a dryrun, so run the script.
+	sh $T/script
+fi
+
+# Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
new file mode 100755
index 000000000..24fe5f822
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+#
+# Check the build output from an rcutorture run for goodness.
+# The "file" is a pathname on the local system, and "title" is
+# a text string for error-message purposes.
+#
+# The file must contain kernel build output.
+#
+# Usage: parse-build.sh file title
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+F=$1
+title=$2
+T=${TMPDIR-/tmp}/parse-build.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+. functions.sh
+
+if grep -q CC < $F
+then
+	:
+else
+	print_bug $title no build
+	exit 1
+fi
+
+if grep -q "error:" < $F
+then
+	print_bug $title build errors:
+	grep "error:" < $F
+	exit 2
+fi
+
+grep warning: < $F > $T/warnings
+grep "include/linux/*rcu*\.h:" $T/warnings > $T/hwarnings
+grep "kernel/rcu/[^/]*:" $T/warnings > $T/cwarnings
+cat $T/hwarnings $T/cwarnings > $T/rcuwarnings
+if test -s $T/rcuwarnings
+then
+	print_warning $title build errors:
+	cat $T/rcuwarnings
+	exit 2
+fi
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
new file mode 100755
index 000000000..84933f6ae
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+#
+# Check the console output from an rcutorture run for oopses.
+# The "file" is a pathname on the local system, and "title" is
+# a text string for error-message purposes.
+#
+# Usage: parse-console.sh file title
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/parse-console.sh.$$
+file="$1"
+title="$2"
+
+trap 'rm -f $T.seq $T.diags' 0
+
+. functions.sh
+
+# Check for presence and readability of console output file
+if test -f "$file" -a -r "$file"
+then
+	:
+else
+	echo $title unreadable console output file: $file
+	exit 1
+fi
+if grep -Pq '\x00' < $file
+then
+	print_warning Console output contains nul bytes, old qemu still running?
+fi
+cat /dev/null > $file.diags
+
+# Check for proper termination, except that rcuperf runs don't indicate this.
+if test "$TORTURE_SUITE" != rcuperf
+then
+	# check for abject failure
+
+	if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
+	then
+		nerrs=`grep --binary-files=text '!!!' $file |
+		tail -1 |
+		awk '
+		{
+			for (i=NF-8;i<=NF;i++)
+				sum+=$i;
+		}
+		END { print sum }'`
+		print_bug $title FAILURE, $nerrs instances
+		exit
+	fi
+
+	grep --binary-files=text 'torture:.*ver:' $file |
+	egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
+	sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+	awk '
+	BEGIN	{
+		ver = 0;
+		badseq = 0;
+		}
+
+		{
+		if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+			badseqno1 = ver;
+			badseqno2 = $5;
+			badseqnr = NR;
+			badseq = 1;
+		}
+		ver = $5
+		}
+
+	END	{
+		if (badseq) {
+			if (badseqno1 == badseqno2 && badseqno2 == ver)
+				print "GP HANG at " ver " torture stat " badseqnr;
+			else
+				print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
+		}
+		}' > $T.seq
+
+	if grep -q SUCCESS $file
+	then
+		if test -s $T.seq
+		then
+			print_warning $title `cat $T.seq`
+			echo "   " $file
+			exit 2
+		fi
+	else
+		if grep -q "_HOTPLUG:" $file
+		then
+			print_warning HOTPLUG FAILURES $title `cat $T.seq`
+			echo "   " $file
+			exit 3
+		fi
+		echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
+		if test -s $T.seq
+		then
+			print_warning $title `cat $T.seq`
+		fi
+		exit 2
+	fi
+fi | tee -a $file.diags
+
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
+grep -v 'ODEBUG: ' |
+grep -v 'Warning: unable to open an initial console' > $T.diags
+if test -s $T.diags
+then
+	print_warning "Assertion failure in $file $title"
+	# cat $T.diags
+	summary=""
+	n_badness=`grep -c Badness $file`
+	if test "$n_badness" -ne 0
+	then
+		summary="$summary  Badness: $n_badness"
+	fi
+	n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
+	if test "$n_warn" -ne 0
+	then
+		summary="$summary  Warnings: $n_warn"
+	fi
+	n_bugs=`egrep -c 'BUG|Oops:' $file`
+	if test "$n_bugs" -ne 0
+	then
+		summary="$summary  Bugs: $n_bugs"
+	fi
+	n_calltrace=`grep -c 'Call Trace:' $file`
+	if test "$n_calltrace" -ne 0
+	then
+		summary="$summary  Call Traces: $n_calltrace"
+	fi
+	n_lockdep=`grep -c =========== $file`
+	if test "$n_badness" -ne 0
+	then
+		summary="$summary  lockdep: $n_badness"
+	fi
+	n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
+	if test "$n_stalls" -ne 0
+	then
+		summary="$summary  Stalls: $n_stalls"
+	fi
+	n_starves=`grep -c 'rcu_.*kthread starved for' $file`
+	if test "$n_starves" -ne 0
+	then
+		summary="$summary  Starves: $n_starves"
+	fi
+	print_warning Summary: $summary
+	cat $T.diags >> $file.diags
+fi
+for i in $file.*.diags
+do
+	if test -f "$i"
+	then
+		cat $i >> $file.diags
+	fi
+done
+if ! test -s $file.diags
+then
+	rm -f $file.diags
+fi
diff --git a/tools/testing/selftests/rcutorture/configs/lock/BUSTED b/tools/testing/selftests/rcutorture/configs/lock/BUSTED
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/BUSTED
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot
new file mode 100644
index 000000000..6386c15e9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot
@@ -0,0 +1 @@
+locktorture.torture_type=lock_busted
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
new file mode 100644
index 000000000..41bae5824
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
@@ -0,0 +1,7 @@
+LOCK01
+LOCK02
+LOCK03
+LOCK04
+LOCK05
+LOCK06
+LOCK07
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFcommon b/tools/testing/selftests/rcutorture/configs/lock/CFcommon
new file mode 100644
index 000000000..e372dc269
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_LOCK_TORTURE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK01 b/tools/testing/selftests/rcutorture/configs/lock/LOCK01
new file mode 100644
index 000000000..a9625e3d6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK01
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK02 b/tools/testing/selftests/rcutorture/configs/lock/LOCK02
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK02
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot
new file mode 100644
index 000000000..5aa44b4f1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot
@@ -0,0 +1 @@
+locktorture.torture_type=mutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK03 b/tools/testing/selftests/rcutorture/configs/lock/LOCK03
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK03
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot
new file mode 100644
index 000000000..a67bbe024
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rwsem_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK04 b/tools/testing/selftests/rcutorture/configs/lock/LOCK04
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK04
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot
new file mode 100644
index 000000000..48c04fe47
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rw_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK05 b/tools/testing/selftests/rcutorture/configs/lock/LOCK05
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK05
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot
new file mode 100644
index 000000000..8ac37307c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rtmutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK06 b/tools/testing/selftests/rcutorture/configs/lock/LOCK06
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK06
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot
new file mode 100644
index 000000000..f92219cd4
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot
@@ -0,0 +1 @@
+locktorture.torture_type=percpu_rwsem_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07 b/tools/testing/selftests/rcutorture/configs/lock/LOCK07
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot
new file mode 100644
index 000000000..97dadd1a9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot
@@ -0,0 +1 @@
+locktorture.torture_type=ww_mutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
new file mode 100644
index 000000000..80eb646e1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Kernel-version-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2014
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# locktorture_param_onoff bootparam-string config-file
+#
+# Adds onoff locktorture module parameters to kernels having it.
+locktorture_param_onoff () {
+	if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
+	then
+		echo CPU-hotplug kernel, adding locktorture onoff. 1>&2
+		echo locktorture.onoff_interval=3 locktorture.onoff_holdoff=30
+	fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+	echo $1 `locktorture_param_onoff "$1" "$2"` \
+		locktorture.stat_interval=15 \
+		locktorture.shutdown_secs=$3 \
+		locktorture.verbose=1
+}
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED
new file mode 100644
index 000000000..48d8a245c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED
@@ -0,0 +1,7 @@
+CONFIG_RCU_TRACE=n
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
new file mode 100644
index 000000000..be7728db4
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=busted
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
new file mode 100644
index 000000000..6a0b9f69f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -0,0 +1,18 @@
+TREE01
+TREE02
+TREE03
+TREE04
+TREE05
+TREE06
+TREE07
+TREE08
+TREE09
+SRCU-N
+SRCU-P
+SRCU-t
+SRCU-u
+TINY01
+TINY02
+TASKS01
+TASKS02
+TASKS03
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
new file mode 100644
index 000000000..d2d2a8613
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_TORTURE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
new file mode 100644
index 000000000..2da8b4958
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -0,0 +1,8 @@
+CONFIG_RCU_TRACE=n
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
new file mode 100644
index 000000000..238bfe3bd
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcu
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
new file mode 100644
index 000000000..ab7ccd382
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
@@ -0,0 +1,12 @@
+CONFIG_RCU_TRACE=n
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
new file mode 100644
index 000000000..84a7d51b7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
new file mode 100644
index 000000000..6c78022c8
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
@@ -0,0 +1,10 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_SRCU=y
+CONFIG_RCU_TRACE=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+#CHECK#CONFIG_PREEMPT_COUNT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
new file mode 100644
index 000000000..238bfe3bd
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcu
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
new file mode 100644
index 000000000..c15ada821
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
@@ -0,0 +1,10 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_SRCU=y
+CONFIG_RCU_TRACE=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
new file mode 100644
index 000000000..84a7d51b7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
new file mode 100644
index 000000000..bafe94cbd
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -0,0 +1,10 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
new file mode 100644
index 000000000..cd2a188ee
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
new file mode 100644
index 000000000..ad2be91e5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -0,0 +1,4 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
new file mode 100644
index 000000000..cd2a188ee
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
new file mode 100644
index 000000000..28568b72a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -0,0 +1,12 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
new file mode 100644
index 000000000..838297c58
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks nohz_full=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
new file mode 100644
index 000000000..6db705e55
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
@@ -0,0 +1,13 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=n
+#CHECK#CONFIG_RCU_STALL_COMMON=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
new file mode 100644
index 000000000..d86742643
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -0,0 +1,14 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
+CONFIG_HZ_PERIODIC=y
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=n
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
new file mode 100644
index 000000000..6c1a292a6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
@@ -0,0 +1,3 @@
+rcupdate.rcu_self_test=1
+rcupdate.rcu_self_test_bh=1
+rcutorture.torture_type=rcu_bh
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
new file mode 100644
index 000000000..b5b53973c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_MAXSMP=y
+CONFIG_CPUMASK_OFFSTACK=y
+CONFIG_RCU_NOCB_CPU=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
new file mode 100644
index 000000000..9f3a4d28e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -0,0 +1,5 @@
+rcutorture.torture_type=rcu_bh maxcpus=8 nr_cpus=43
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcu_nocbs=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
new file mode 100644
index 000000000..35e639e39
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=3
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_RCU_EXPERT=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
new file mode 100644
index 000000000..2dc31b16e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=y
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_RCU_BOOST=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
new file mode 100644
index 000000000..5c3213cc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -0,0 +1,5 @@
+rcutorture.onoff_interval=200 rcutorture.onoff_holdoff=30
+rcutree.gp_preinit_delay=12
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcutree.kthread_prio=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
new file mode 100644
index 000000000..24c9f6012
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -0,0 +1,20 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=4
+CONFIG_RCU_FANOUT_LEAF=3
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
new file mode 100644
index 000000000..e6071bb96
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
new file mode 100644
index 000000000..2dde0d996
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -0,0 +1,20 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=6
+CONFIG_RCU_FANOUT_LEAF=6
+CONFIG_RCU_NOCB_CPU=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
new file mode 100644
index 000000000..c7fd050df
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
@@ -0,0 +1,5 @@
+rcutorture.torture_type=sched
+rcupdate.rcu_self_test_sched=1
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
new file mode 100644
index 000000000..05a4eec3f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=6
+CONFIG_RCU_FANOUT_LEAF=6
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
new file mode 100644
index 000000000..ad18b52a2
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
@@ -0,0 +1,7 @@
+rcupdate.rcu_self_test=1
+rcupdate.rcu_self_test_bh=1
+rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
new file mode 100644
index 000000000..d7afb271a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -0,0 +1,17 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
new file mode 100644
index 000000000..d44609937
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
@@ -0,0 +1 @@
+nohz_full=2-9
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
new file mode 100644
index 000000000..fb1c763c1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
new file mode 100644
index 000000000..1bd8efc41
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -0,0 +1,5 @@
+rcutorture.torture_type=sched
+rcupdate.rcu_self_test=1
+rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
+rcu_nocbs=0-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
new file mode 100644
index 000000000..6710e749d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -0,0 +1,18 @@
+CONFIG_SMP=n
+CONFIG_NR_CPUS=1
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
new file mode 100644
index 000000000..7bab82463
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+#
+# Kernel-version-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# rcutorture_param_n_barrier_cbs bootparam-string
+#
+# Adds n_barrier_cbs rcutorture module parameter to kernels having it.
+rcutorture_param_n_barrier_cbs () {
+	if echo $1 | grep -q "rcutorture\.n_barrier_cbs"
+	then
+		:
+	else
+		echo rcutorture.n_barrier_cbs=4
+	fi
+}
+
+# rcutorture_param_onoff bootparam-string config-file
+#
+# Adds onoff rcutorture module parameters to kernels having it.
+rcutorture_param_onoff () {
+	if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
+	then
+		echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2
+		echo rcutorture.onoff_interval=1000 rcutorture.onoff_holdoff=30
+	fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+	echo $1 `rcutorture_param_onoff "$1" "$2"` \
+		`rcutorture_param_n_barrier_cbs "$1"` \
+		rcutorture.stat_interval=15 \
+		rcutorture.shutdown_secs=$3 \
+		rcutorture.test_no_idle_hz=1 \
+		rcutorture.verbose=1
+}
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
new file mode 100644
index 000000000..c9f56cf20
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
@@ -0,0 +1 @@
+TREE
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
new file mode 100644
index 000000000..a09816b8c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_PERF_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
new file mode 100644
index 000000000..fb05ef527
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
@@ -0,0 +1,16 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
new file mode 100644
index 000000000..721cfda76
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
@@ -0,0 +1,19 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
new file mode 100644
index 000000000..7629f5dd7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
@@ -0,0 +1,22 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=54
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
new file mode 100644
index 000000000..d36b8fd6f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2015
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+	echo $1 rcuperf.shutdown=1 \
+		rcuperf.verbose=1
+}
diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
new file mode 100644
index 000000000..a75b16991
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
@@ -0,0 +1,38 @@
+This document gives a brief rationale for the TINY_RCU test cases.
+
+
+Kconfig Parameters:
+
+CONFIG_DEBUG_LOCK_ALLOC -- Do all three and none of the three.
+CONFIG_PREEMPT_COUNT
+CONFIG_RCU_TRACE
+
+The theory here is that randconfig testing will hit the other six possible
+combinations of these parameters.
+
+
+Kconfig Parameters Ignored:
+
+CONFIG_DEBUG_OBJECTS_RCU_HEAD
+CONFIG_PROVE_RCU
+
+	In common code tested by TREE_RCU test cases.
+
+CONFIG_RCU_NOCB_CPU
+
+	Meaningless for TINY_RCU.
+
+CONFIG_RCU_STALL_COMMON
+CONFIG_RCU_TORTURE_TEST
+
+	Redundant with CONFIG_RCU_TRACE.
+
+CONFIG_HOTPLUG_CPU
+CONFIG_PREEMPT
+CONFIG_PREEMPT_RCU
+CONFIG_SMP
+CONFIG_TINY_RCU
+CONFIG_PREEMPT_RCU
+CONFIG_TREE_RCU
+
+	All forced by CONFIG_TINY_RCU.
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
new file mode 100644
index 000000000..af6fca036
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -0,0 +1,81 @@
+This document gives a brief rationale for the TREE_RCU-related test
+cases, a group that includes PREEMPT_RCU.
+
+
+Kconfig Parameters:
+
+CONFIG_DEBUG_LOCK_ALLOC -- Do three, covering CONFIG_PROVE_LOCKING & not.
+CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one.
+CONFIG_HOTPLUG_CPU -- Do half.  (Every second.)
+CONFIG_HZ_PERIODIC -- Do one.
+CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
+CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement.
+CONFIG_PREEMPT -- Do half.  (First three and #8.)
+CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
+CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
+CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
+CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
+CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
+CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs.
+CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with
+	rcu_nocbs=0, and one with all rcu_nocbs CPUs.
+CONFIG_RCU_TRACE -- Do half.
+CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
+CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations.
+CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not.
+
+RCU-bh: Do one with PREEMPT and one with !PREEMPT.
+RCU-sched: Do one with PREEMPT but not BOOST.
+
+
+Boot parameters:
+
+nohz_full - do at least one.
+maxcpu -- do at least one.
+rcupdate.rcu_self_test_bh -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test_sched -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test -- Do at least one each, offloaded and not.
+rcutree.rcu_fanout_exact -- Do at least one.
+
+
+Kconfig Parameters Ignored:
+
+CONFIG_64BIT
+
+	Used only to check CONFIG_RCU_FANOUT value, inspection suffices.
+
+CONFIG_PREEMPT_COUNT
+CONFIG_PREEMPT_RCU
+
+	Redundant with CONFIG_PREEMPT, ignore.
+
+CONFIG_RCU_BOOST_DELAY
+
+	Inspection suffices, ignore.
+
+CONFIG_RCU_CPU_STALL_TIMEOUT
+
+	Inspection suffices, ignore.
+
+CONFIG_RCU_STALL_COMMON
+
+	Implied by TREE_RCU and PREEMPT_RCU.
+
+CONFIG_RCU_TORTURE_TEST
+CONFIG_RCU_TORTURE_TEST_RUNNABLE
+
+	Always used in KVM testing.
+
+CONFIG_PREEMPT_RCU
+CONFIG_TREE_RCU
+CONFIG_TINY_RCU
+CONFIG_TASKS_RCU
+
+	These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
+
+CONFIG_SRCU
+
+	Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
+
+
+boot parameters ignored: TBD
diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt
new file mode 100644
index 000000000..833f826d6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/initrd.txt
@@ -0,0 +1,113 @@
+This document describes one way to create the initrd directory hierarchy
+in order to allow an initrd to be built into your kernel.  The trick
+here is to steal the initrd file used on your Linux laptop, Ubuntu in
+this case.  There are probably much better ways of doing this.
+
+That said, here are the commands:
+
+------------------------------------------------------------------------
+cd tools/testing/selftests/rcutorture
+zcat /initrd.img > /tmp/initrd.img.zcat
+mkdir initrd
+cd initrd
+cpio -id < /tmp/initrd.img.zcat
+------------------------------------------------------------------------
+
+Another way to create an initramfs image is using "dracut"[1], which is
+available on many distros, however the initramfs dracut generates is a cpio
+archive with another cpio archive in it, so an extra step is needed to create
+the initrd directory hierarchy.
+
+Here are the commands to create a initrd directory for rcutorture using
+dracut:
+
+------------------------------------------------------------------------
+dracut --no-hostonly --no-hostonly-cmdline --module "base bash shutdown" /tmp/initramfs.img
+cd tools/testing/selftests/rcutorture
+mkdir initrd
+cd initrd
+/usr/lib/dracut/skipcpio /tmp/initramfs.img | zcat | cpio -id < /tmp/initramfs.img
+------------------------------------------------------------------------
+
+Interestingly enough, if you are running rcutorture, you don't really
+need userspace in many cases.  Running without userspace has the
+advantage of allowing you to test your kernel independently of the
+distro in place, the root-filesystem layout, and so on.  To make this
+happen, put the following script in the initrd's tree's "/init" file,
+with 0755 mode.
+
+------------------------------------------------------------------------
+#!/bin/sh
+
+[ -d /dev ] || mkdir -m 0755 /dev
+[ -d /root ] || mkdir -m 0700 /root
+[ -d /sys ] || mkdir /sys
+[ -d /proc ] || mkdir /proc
+[ -d /tmp ] || mkdir /tmp
+mkdir -p /var/lock
+mount -t sysfs -o nodev,noexec,nosuid sysfs /sys
+mount -t proc -o nodev,noexec,nosuid proc /proc
+# Some things don't work properly without /etc/mtab.
+ln -sf /proc/mounts /etc/mtab
+
+# Note that this only becomes /dev on the real filesystem if udev's scripts
+# are used; which they will be, but it's worth pointing out
+if ! mount -t devtmpfs -o mode=0755 udev /dev; then
+	echo "W: devtmpfs not available, falling back to tmpfs for /dev"
+	mount -t tmpfs -o mode=0755 udev /dev
+	[ -e /dev/console ] || mknod --mode=600 /dev/console c 5 1
+	[ -e /dev/kmsg ] || mknod --mode=644 /dev/kmsg c 1 11
+	[ -e /dev/null ] || mknod --mode=666 /dev/null c 1 3
+fi
+
+mkdir /dev/pts
+mount -t devpts -o noexec,nosuid,gid=5,mode=0620 devpts /dev/pts || true
+mount -t tmpfs -o "nosuid,size=20%,mode=0755" tmpfs /run
+mkdir /run/initramfs
+# compatibility symlink for the pre-oneiric locations
+ln -s /run/initramfs /dev/.initramfs
+
+# Export relevant variables
+export ROOT=
+export ROOTDELAY=
+export ROOTFLAGS=
+export ROOTFSTYPE=
+export IP=
+export BOOT=
+export BOOTIF=
+export UBIMTD=
+export break=
+export init=/sbin/init
+export quiet=n
+export readonly=y
+export rootmnt=/root
+export debug=
+export panic=
+export blacklist=
+export resume=
+export resume_offset=
+export recovery=
+
+for i in /sys/devices/system/cpu/cpu*/online
+do
+	case $i in
+	'/sys/devices/system/cpu/cpu0/online')
+		;;
+	'/sys/devices/system/cpu/cpu*/online')
+		;;
+	*)
+		echo 1 > $i
+		;;
+	esac
+done
+
+while :
+do
+	sleep 10
+done
+------------------------------------------------------------------------
+
+References:
+[1]: https://dracut.wiki.kernel.org/index.php/Main_Page
+[2]: http://blog.elastocloud.org/2015/06/rapid-linux-kernel-devtest-with-qemu.html
+[3]: https://www.centos.org/forums/viewtopic.php?t=51621
diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
new file mode 100644
index 000000000..449cf579d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
@@ -0,0 +1,42 @@
+This document describes one way to create the rcu-test-image file
+that contains the filesystem used by the guest-OS kernel.  There are
+probably much better ways of doing this, and this filesystem could no
+doubt be smaller.  It is probably also possible to simply download
+an appropriate image from any number of places.
+
+That said, here are the commands:
+
+------------------------------------------------------------------------
+dd if=/dev/zero of=rcu-test-image bs=400M count=1
+mkfs.ext3 ./rcu-test-image
+sudo mount -o loop ./rcu-test-image /mnt
+
+# Replace "precise" below with your favorite Ubuntu release.
+# Empirical evidence says this image will work for 64-bit, but...
+# Note that debootstrap does take a few minutes to run.  Or longer.
+sudo debootstrap --verbose --arch i386 precise /mnt http://archive.ubuntu.com/ubuntu
+cat << '___EOF___' | sudo dd of=/mnt/etc/fstab
+# UNCONFIGURED FSTAB FOR BASE SYSTEM
+#
+/dev/vda        /               ext3    defaults        1 1
+dev             /dev            tmpfs   rw              0 0
+tmpfs           /dev/shm        tmpfs   defaults        0 0
+devpts          /dev/pts        devpts  gid=5,mode=620  0 0
+sysfs           /sys            sysfs   defaults        0 0
+proc            /proc           proc    defaults        0 0
+___EOF___
+sudo umount /mnt
+------------------------------------------------------------------------
+
+
+References:
+
+	http://sripathikodi.blogspot.com/2010/02/creating-kvm-bootable-fedora-system.html
+	https://help.ubuntu.com/community/KVM/CreateGuests
+	https://help.ubuntu.com/community/JeOSVMBuilder
+	http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
+	http://www.moe.co.uk/2011/01/07/pci_add_option_rom-failed-to-find-romfile-pxe-rtl8139-bin/ -- "apt-get install kvm-pxe"
+	http://www.landley.net/writing/rootfs-howto.html
+	http://en.wikipedia.org/wiki/Initrd
+	http://en.wikipedia.org/wiki/Cpio
+	http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
new file mode 100644
index 000000000..712a3d41a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
@@ -0,0 +1 @@
+srcu.c
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
new file mode 100644
index 000000000..4bed0b678
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all: srcu.c store_buffering
+
+LINUX_SOURCE = ../../../../../..
+
+modified_srcu_input = $(LINUX_SOURCE)/include/linux/srcu.h \
+		      $(LINUX_SOURCE)/kernel/rcu/srcu.c
+
+modified_srcu_output = include/linux/srcu.h srcu.c
+
+include/linux/srcu.h: srcu.c
+
+srcu.c: modify_srcu.awk Makefile $(modified_srcu_input)
+	awk -f modify_srcu.awk $(modified_srcu_input) $(modified_srcu_output)
+
+store_buffering:
+	@cd tests/store_buffering; make
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
new file mode 100644
index 000000000..1d016e669
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
@@ -0,0 +1 @@
+srcu.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
new file mode 100644
index 000000000..f2860dd1b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
@@ -0,0 +1 @@
+#include <LINUX_SOURCE/linux/kconfig.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
new file mode 100644
index 000000000..891ad13e9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This header has been modifies to remove definitions of types that
+ * are defined in standard userspace headers or are problematic for some
+ * other reason.
+ */
+
+#ifndef _LINUX_TYPES_H
+#define _LINUX_TYPES_H
+
+#define __EXPORTED_HEADERS__
+#include <uapi/linux/types.h>
+
+#ifndef __ASSEMBLY__
+
+#define DECLARE_BITMAP(name, bits) \
+	unsigned long name[BITS_TO_LONGS(bits)]
+
+typedef __u32 __kernel_dev_t;
+
+/* bsd */
+typedef unsigned char		u_char;
+typedef unsigned short		u_short;
+typedef unsigned int		u_int;
+typedef unsigned long		u_long;
+
+/* sysv */
+typedef unsigned char		unchar;
+typedef unsigned short		ushort;
+typedef unsigned int		uint;
+typedef unsigned long		ulong;
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+typedef		__u8		u_int8_t;
+typedef		__s8		int8_t;
+typedef		__u16		u_int16_t;
+typedef		__s16		int16_t;
+typedef		__u32		u_int32_t;
+typedef		__s32		int32_t;
+
+#endif /* !(__BIT_TYPES_DEFINED__) */
+
+typedef		__u8		uint8_t;
+typedef		__u16		uint16_t;
+typedef		__u32		uint32_t;
+
+/* this is a special 64bit data type that is 8-byte aligned */
+#define aligned_u64 __u64 __attribute__((aligned(8)))
+#define aligned_be64 __be64 __attribute__((aligned(8)))
+#define aligned_le64 __le64 __attribute__((aligned(8)))
+
+/**
+ * The type used for indexing onto a disc or disc partition.
+ *
+ * Linux always considers sectors to be 512 bytes long independently
+ * of the devices real block size.
+ *
+ * blkcnt_t is the type of the inode's block count.
+ */
+#ifdef CONFIG_LBDAF
+typedef u64 sector_t;
+#else
+typedef unsigned long sector_t;
+#endif
+
+/*
+ * The type of an index into the pagecache.
+ */
+#define pgoff_t unsigned long
+
+/*
+ * A dma_addr_t can hold any valid DMA address, i.e., any address returned
+ * by the DMA API.
+ *
+ * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32
+ * bits wide.  Bus addresses, e.g., PCI BARs, may be wider than 32 bits,
+ * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses,
+ * so they don't care about the size of the actual bus addresses.
+ */
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+typedef u64 phys_addr_t;
+#else
+typedef u32 phys_addr_t;
+#endif
+
+typedef phys_addr_t resource_size_t;
+
+/*
+ * This type is the placeholder for a hardware interrupt number. It has to be
+ * big enough to enclose whatever representation is used by a given platform.
+ */
+typedef unsigned long irq_hw_number_t;
+
+typedef struct {
+	int counter;
+} atomic_t;
+
+#ifdef CONFIG_64BIT
+typedef struct {
+	long counter;
+} atomic64_t;
+#endif
+
+struct list_head {
+	struct list_head *next, *prev;
+};
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+/**
+ * struct callback_head - callback structure for use with RCU and task_work
+ * @next: next update requests in a list
+ * @func: actual update function to call after the grace period.
+ *
+ * The struct is aligned to size of pointer. On most architectures it happens
+ * naturally due ABI requirements, but some architectures (like CRIS) have
+ * weird ABI and we need to ask it explicitly.
+ *
+ * The alignment is required to guarantee that bits 0 and 1 of @next will be
+ * clear under normal conditions -- as long as we use call_rcu(),
+ * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
+ *
+ * This guarantee is important for few reasons:
+ *  - future call_rcu_lazy() will make use of lower bits in the pointer;
+ *  - the structure shares storage spacer in struct page with @compound_head,
+ *    which encode PageTail() in bit 0. The guarantee is needed to avoid
+ *    false-positive PageTail().
+ */
+struct callback_head {
+	struct callback_head *next;
+	void (*func)(struct callback_head *head);
+} __attribute__((aligned(sizeof(void *))));
+#define rcu_head callback_head
+
+typedef void (*rcu_callback_t)(struct rcu_head *head);
+typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
+
+/* clocksource cycle base type */
+typedef u64 cycle_t;
+
+#endif /*  __ASSEMBLY__ */
+#endif /* _LINUX_TYPES_H */
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
new file mode 100755
index 000000000..e05182d3e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
@@ -0,0 +1,376 @@
+#!/usr/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+
+# Modify SRCU for formal verification. The first argument should be srcu.h and
+# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the
+# current directory.
+
+BEGIN {
+	if (ARGC != 5) {
+		print "Usange: input.h input.c output.h output.c" > "/dev/stderr";
+		exit 1;
+	}
+	h_output = ARGV[3];
+	c_output = ARGV[4];
+	ARGC = 3;
+
+	# Tokenize using FS and not RS as FS supports regular expressions. Each
+	# record is one line of source, except that backslashed lines are
+	# combined. Comments are treated as field separators, as are quotes.
+	quote_regexp="\"([^\\\\\"]|\\\\.)*\"";
+	comment_regexp="\\/\\*([^*]|\\*+[^*/])*\\*\\/|\\/\\/.*(\n|$)";
+	FS="([ \\\\\t\n\v\f;,.=(){}+*/<>&|^-]|\\[|\\]|" comment_regexp "|" quote_regexp ")+";
+
+	inside_srcu_struct = 0;
+	inside_srcu_init_def = 0;
+	srcu_init_param_name = "";
+	in_macro = 0;
+	brace_nesting = 0;
+	paren_nesting = 0;
+
+	# Allow the manipulation of the last field separator after has been
+	# seen.
+	last_fs = "";
+	# Whether the last field separator was intended to be output.
+	last_fs_print = 0;
+
+	# rcu_batches stores the initialization for each instance of struct
+	# rcu_batch
+
+	in_comment = 0;
+
+	outputfile = "";
+}
+
+{
+	prev_outputfile = outputfile;
+	if (FILENAME ~ /\.h$/) {
+		outputfile = h_output;
+		if (FNR != NR) {
+			print "Incorrect file order" > "/dev/stderr";
+			exit 1;
+		}
+	}
+	else
+		outputfile = c_output;
+
+	if (prev_outputfile && outputfile != prev_outputfile) {
+		new_outputfile = outputfile;
+		outputfile = prev_outputfile;
+		update_fieldsep("", 0);
+		outputfile = new_outputfile;
+	}
+}
+
+# Combine the next line into $0.
+function combine_line() {
+	ret = getline next_line;
+	if (ret == 0) {
+		# Don't allow two consecutive getlines at the end of the file
+		if (eof_found) {
+			print "Error: expected more input." > "/dev/stderr";
+			exit 1;
+		} else {
+			eof_found = 1;
+		}
+	} else if (ret == -1) {
+		print "Error reading next line of file" FILENAME > "/dev/stderr";
+		exit 1;
+	}
+	$0 = $0 "\n" next_line;
+}
+
+# Combine backslashed lines and multiline comments.
+function combine_backslashes() {
+	while (/\\$|\/\*([^*]|\*+[^*\/])*\**$/) {
+		combine_line();
+	}
+}
+
+function read_line() {
+	combine_line();
+	combine_backslashes();
+}
+
+# Print out field separators and update variables that depend on them. Only
+# print if p is true. Call with sep="" and p=0 to print out the last field
+# separator.
+function update_fieldsep(sep, p) {
+	# Count braces
+	sep_tmp = sep;
+	gsub(quote_regexp "|" comment_regexp, "", sep_tmp);
+	while (1)
+	{
+		if (sub("[^{}()]*\\{", "", sep_tmp)) {
+			brace_nesting++;
+			continue;
+		}
+		if (sub("[^{}()]*\\}", "", sep_tmp)) {
+			brace_nesting--;
+			if (brace_nesting < 0) {
+				print "Unbalanced braces!" > "/dev/stderr";
+				exit 1;
+			}
+			continue;
+		}
+		if (sub("[^{}()]*\\(", "", sep_tmp)) {
+			paren_nesting++;
+			continue;
+		}
+		if (sub("[^{}()]*\\)", "", sep_tmp)) {
+			paren_nesting--;
+			if (paren_nesting < 0) {
+				print "Unbalanced parenthesis!" > "/dev/stderr";
+				exit 1;
+			}
+			continue;
+		}
+
+		break;
+	}
+
+	if (last_fs_print)
+		printf("%s", last_fs) > outputfile;
+	last_fs = sep;
+	last_fs_print = p;
+}
+
+# Shifts the fields down by n positions. Calls next if there are no more. If p
+# is true then print out field separators.
+function shift_fields(n, p) {
+	do {
+		if (match($0, FS) > 0) {
+			update_fieldsep(substr($0, RSTART, RLENGTH), p);
+			if (RSTART + RLENGTH <= length())
+				$0 = substr($0, RSTART + RLENGTH);
+			else
+				$0 = "";
+		} else {
+			update_fieldsep("", 0);
+			print "" > outputfile;
+			next;
+		}
+	} while (--n > 0);
+}
+
+# Shifts and prints the first n fields.
+function print_fields(n) {
+	do {
+		update_fieldsep("", 0);
+		printf("%s", $1) > outputfile;
+		shift_fields(1, 1);
+	} while (--n > 0);
+}
+
+{
+	combine_backslashes();
+}
+
+# Print leading FS
+{
+	if (match($0, "^(" FS ")+") > 0) {
+		update_fieldsep(substr($0, RSTART, RLENGTH), 1);
+		if (RSTART + RLENGTH <= length())
+			$0 = substr($0, RSTART + RLENGTH);
+		else
+			$0 = "";
+	}
+}
+
+# Parse the line.
+{
+	while (NF > 0) {
+		if ($1 == "struct" && NF < 3) {
+			read_line();
+			continue;
+		}
+
+		if (FILENAME ~ /\.h$/ && !inside_srcu_struct &&
+		    brace_nesting == 0 && paren_nesting == 0 &&
+		    $1 == "struct" && $2 == "srcu_struct" &&
+		    $0 ~ "^struct(" FS ")+srcu_struct(" FS ")+\\{") {
+			inside_srcu_struct = 1;
+			print_fields(2);
+			continue;
+		}
+		if (inside_srcu_struct && brace_nesting == 0 &&
+		    paren_nesting == 0) {
+			inside_srcu_struct = 0;
+			update_fieldsep("", 0);
+			for (name in rcu_batches)
+				print "extern struct rcu_batch " name ";" > outputfile;
+		}
+
+		if (inside_srcu_struct && $1 == "struct" && $2 == "rcu_batch") {
+			# Move rcu_batches outside of the struct.
+			rcu_batches[$3] = "";
+			shift_fields(3, 1);
+			sub(/;[[:space:]]*$/, "", last_fs);
+			continue;
+		}
+
+		if (FILENAME ~ /\.h$/ && !inside_srcu_init_def &&
+		    $1 == "#define" && $2 == "__SRCU_STRUCT_INIT") {
+			inside_srcu_init_def = 1;
+			srcu_init_param_name = $3;
+			in_macro = 1;
+			print_fields(3);
+			continue;
+		}
+		if (inside_srcu_init_def && brace_nesting == 0 &&
+		    paren_nesting == 0) {
+			inside_srcu_init_def = 0;
+			in_macro = 0;
+			continue;
+		}
+
+		if (inside_srcu_init_def && brace_nesting == 1 &&
+		    paren_nesting == 0 && last_fs ~ /\.[[:space:]]*$/ &&
+		    $1 ~ /^[[:alnum:]_]+$/) {
+			name = $1;
+			if (name in rcu_batches) {
+				# Remove the dot.
+				sub(/\.[[:space:]]*$/, "", last_fs);
+
+				old_record = $0;
+				do
+					shift_fields(1, 0);
+				while (last_fs !~ /,/ || paren_nesting > 0);
+				end_loc = length(old_record) - length($0);
+				end_loc += index(last_fs, ",") - length(last_fs);
+
+				last_fs = substr(last_fs, index(last_fs, ",") + 1);
+				last_fs_print = 1;
+
+				match(old_record, "^"name"("FS")+=");
+				start_loc = RSTART + RLENGTH;
+
+				len = end_loc - start_loc;
+				initializer = substr(old_record, start_loc, len);
+				gsub(srcu_init_param_name "\\.", "", initializer);
+				rcu_batches[name] = initializer;
+				continue;
+			}
+		}
+
+		# Don't include a nonexistent file
+		if (!in_macro && $1 == "#include" && /^#include[[:space:]]+"rcu\.h"/) {
+			update_fieldsep("", 0);
+			next;
+		}
+
+		# Ignore most preprocessor stuff.
+		if (!in_macro && $1 ~ /#/) {
+			break;
+		}
+
+		if (brace_nesting > 0 && $1 ~ "^[[:alnum:]_]+$" && NF < 2) {
+			read_line();
+			continue;
+		}
+		if (brace_nesting > 0 &&
+		    $0 ~ "^[[:alnum:]_]+[[:space:]]*(\\.|->)[[:space:]]*[[:alnum:]_]+" &&
+		    $2 in rcu_batches) {
+			# Make uses of rcu_batches global. Somewhat unreliable.
+			shift_fields(1, 0);
+			print_fields(1);
+			continue;
+		}
+
+		if ($1 == "static" && NF < 3) {
+			read_line();
+			continue;
+		}
+		if ($1 == "static" && ($2 == "bool" && $3 == "try_check_zero" ||
+		                       $2 == "void" && $3 == "srcu_flip")) {
+			shift_fields(1, 1);
+			print_fields(2);
+			continue;
+		}
+
+		# Distinguish between read-side and write-side memory barriers.
+		if ($1 == "smp_mb" && NF < 2) {
+			read_line();
+			continue;
+		}
+		if (match($0, /^smp_mb[[:space:]();\/*]*[[:alnum:]]/)) {
+			barrier_letter = substr($0, RLENGTH, 1);
+			if (barrier_letter ~ /A|D/)
+				new_barrier_name = "sync_smp_mb";
+			else if (barrier_letter ~ /B|C/)
+				new_barrier_name = "rs_smp_mb";
+			else {
+				print "Unrecognized memory barrier." > "/dev/null";
+				exit 1;
+			}
+
+			shift_fields(1, 1);
+			printf("%s", new_barrier_name) > outputfile;
+			continue;
+		}
+
+		# Skip definition of rcu_synchronize, since it is already
+		# defined in misc.h. Only present in old versions of srcu.
+		if (brace_nesting == 0 && paren_nesting == 0 &&
+		    $1 == "struct" && $2 == "rcu_synchronize" &&
+		    $0 ~ "^struct(" FS ")+rcu_synchronize(" FS ")+\\{") {
+			shift_fields(2, 0);
+			while (brace_nesting) {
+				if (NF < 2)
+					read_line();
+				shift_fields(1, 0);
+			}
+		}
+
+		# Skip definition of wakeme_after_rcu for the same reason
+		if (brace_nesting == 0 && $1 == "static" && $2 == "void" &&
+		    $3 == "wakeme_after_rcu") {
+			while (NF < 5)
+				read_line();
+			shift_fields(3, 0);
+			do {
+				while (NF < 3)
+					read_line();
+				shift_fields(1, 0);
+			} while (paren_nesting || brace_nesting);
+		}
+
+		if ($1 ~ /^(unsigned|long)$/ && NF < 3) {
+			read_line();
+			continue;
+		}
+
+		# Give srcu_batches_completed the correct type for old SRCU.
+		if (brace_nesting == 0 && $1 == "long" &&
+		    $2 == "srcu_batches_completed") {
+			update_fieldsep("", 0);
+			printf("unsigned ") > outputfile;
+			print_fields(2);
+			continue;
+		}
+		if (brace_nesting == 0 && $1 == "unsigned" && $2 == "long" &&
+		    $3 == "srcu_batches_completed") {
+			print_fields(3);
+			continue;
+		}
+
+		# Just print out the input code by default.
+		print_fields(1);
+	}
+	update_fieldsep("", 0);
+	print > outputfile;
+	next;
+}
+
+END {
+	update_fieldsep("", 0);
+
+	if (brace_nesting != 0) {
+		print "Unbalanced braces!" > "/dev/stderr";
+		exit 1;
+	}
+
+	# Define the rcu_batches
+	for (name in rcu_batches)
+		print "struct rcu_batch " name " = " rcu_batches[name] ";" > c_output;
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
new file mode 100644
index 000000000..570a49d9d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASSUME_H
+#define ASSUME_H
+
+/* Provide an assumption macro that can be disabled for gcc. */
+#ifdef RUN
+#define assume(x) \
+	do { \
+		/* Evaluate x to suppress warnings. */ \
+		(void) (x); \
+	} while (0)
+
+#else
+#define assume(x) __CPROVER_assume(x)
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
new file mode 100644
index 000000000..3f95a768a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BARRIERS_H
+#define BARRIERS_H
+
+#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#ifdef RUN
+#define smp_mb() __sync_synchronize()
+#define smp_mb__after_unlock_lock() __sync_synchronize()
+#else
+/*
+ * Copied from CBMC's implementation of __sync_synchronize(), which
+ * seems to be disabled by default.
+ */
+#define smp_mb() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
+				 "WWcumul", "RRcumul", "RWcumul", "WRcumul")
+#define smp_mb__after_unlock_lock() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
+				    "WWcumul", "RRcumul", "RWcumul", "WRcumul")
+#endif
+
+/*
+ * Allow memory barriers to be disabled in either the read or write side
+ * of SRCU individually.
+ */
+
+#ifndef NO_SYNC_SMP_MB
+#define sync_smp_mb() smp_mb()
+#else
+#define sync_smp_mb() do {} while (0)
+#endif
+
+#ifndef NO_READ_SIDE_SMP_MB
+#define rs_smp_mb() smp_mb()
+#else
+#define rs_smp_mb() do {} while (0)
+#endif
+
+#define READ_ONCE(x) (*(volatile typeof(x) *) &(x))
+#define WRITE_ONCE(x) ((*(volatile typeof(x) *) &(x)) = (val))
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
new file mode 100644
index 000000000..5e7912c6a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BUG_ON_H
+#define BUG_ON_H
+
+#include <assert.h>
+
+#define BUG() assert(0)
+#define BUG_ON(x) assert(!(x))
+
+/* Does it make sense to treat warnings as errors? */
+#define WARN() BUG()
+#define WARN_ON(x) (BUG_ON(x), false)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
new file mode 100644
index 000000000..e67ee5b3d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+/* Include all source files. */
+
+#include "include_srcu.c"
+
+#include "preempt.c"
+#include "misc.c"
+
+/* Used by test.c files */
+#include <pthread.h>
+#include <stdlib.h>
+#include <linux/srcu.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
new file mode 100644
index 000000000..283d71033
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* "Cheater" definitions based on restricted Kconfig choices. */
+
+#undef CONFIG_TINY_RCU
+#undef __CHECKER__
+#undef CONFIG_DEBUG_LOCK_ALLOC
+#undef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+#undef CONFIG_HOTPLUG_CPU
+#undef CONFIG_MODULES
+#undef CONFIG_NO_HZ_FULL_SYSIDLE
+#undef CONFIG_PREEMPT_COUNT
+#undef CONFIG_PREEMPT_RCU
+#undef CONFIG_PROVE_RCU
+#undef CONFIG_RCU_NOCB_CPU
+#undef CONFIG_RCU_NOCB_CPU_ALL
+#undef CONFIG_RCU_STALL_COMMON
+#undef CONFIG_RCU_TRACE
+#undef CONFIG_RCU_USER_QS
+#undef CONFIG_TASKS_RCU
+#define CONFIG_TREE_RCU
+
+#define CONFIG_GENERIC_ATOMIC64
+
+#if NR_CPUS > 1
+#define CONFIG_SMP
+#else
+#undef CONFIG_SMP
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
new file mode 100644
index 000000000..e5202d4cf
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "int_typedefs.h"
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "locks.h"
+#include "misc.h"
+#include "preempt.h"
+#include "percpu.h"
+#include "workqueues.h"
+
+#ifdef USE_SIMPLE_SYNC_SRCU
+#define synchronize_srcu(sp) synchronize_srcu_original(sp)
+#endif
+
+#include <srcu.c>
+
+#ifdef USE_SIMPLE_SYNC_SRCU
+#undef synchronize_srcu
+
+#include "simple_sync_srcu.c"
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
new file mode 100644
index 000000000..0dd27aa51
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef INT_TYPEDEFS_H
+#define INT_TYPEDEFS_H
+
+#include <inttypes.h>
+
+typedef int8_t s8;
+typedef uint8_t u8;
+typedef int16_t s16;
+typedef uint16_t u16;
+typedef int32_t s32;
+typedef uint32_t u32;
+typedef int64_t s64;
+typedef uint64_t u64;
+
+typedef int8_t __s8;
+typedef uint8_t __u8;
+typedef int16_t __s16;
+typedef uint16_t __u16;
+typedef int32_t __s32;
+typedef uint32_t __u32;
+typedef int64_t __s64;
+typedef uint64_t __u64;
+
+#define S8_C(x) INT8_C(x)
+#define U8_C(x) UINT8_C(x)
+#define S16_C(x) INT16_C(x)
+#define U16_C(x) UINT16_C(x)
+#define S32_C(x) INT32_C(x)
+#define U32_C(x) UINT32_C(x)
+#define S64_C(x) INT64_C(x)
+#define U64_C(x) UINT64_C(x)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
new file mode 100644
index 000000000..cf6938d67
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LOCKS_H
+#define LOCKS_H
+
+#include <limits.h>
+#include <pthread.h>
+#include <stdbool.h>
+
+#include "assume.h"
+#include "bug_on.h"
+#include "preempt.h"
+
+int nondet_int(void);
+
+#define __acquire(x)
+#define __acquires(x)
+#define __release(x)
+#define __releases(x)
+
+/* Only use one lock mechanism. Select which one. */
+#ifdef PTHREAD_LOCK
+struct lock_impl {
+	pthread_mutex_t mutex;
+};
+
+static inline void lock_impl_lock(struct lock_impl *lock)
+{
+	BUG_ON(pthread_mutex_lock(&lock->mutex));
+}
+
+static inline void lock_impl_unlock(struct lock_impl *lock)
+{
+	BUG_ON(pthread_mutex_unlock(&lock->mutex));
+}
+
+static inline bool lock_impl_trylock(struct lock_impl *lock)
+{
+	int err = pthread_mutex_trylock(&lock->mutex);
+
+	if (!err)
+		return true;
+	else if (err == EBUSY)
+		return false;
+	BUG();
+}
+
+static inline void lock_impl_init(struct lock_impl *lock)
+{
+	pthread_mutex_init(&lock->mutex, NULL);
+}
+
+#define LOCK_IMPL_INITIALIZER {.mutex = PTHREAD_MUTEX_INITIALIZER}
+
+#else /* !defined(PTHREAD_LOCK) */
+/* Spinlock that assumes that it always gets the lock immediately. */
+
+struct lock_impl {
+	bool locked;
+};
+
+static inline bool lock_impl_trylock(struct lock_impl *lock)
+{
+#ifdef RUN
+	/* TODO: Should this be a test and set? */
+	return __sync_bool_compare_and_swap(&lock->locked, false, true);
+#else
+	__CPROVER_atomic_begin();
+	bool old_locked = lock->locked;
+	lock->locked = true;
+	__CPROVER_atomic_end();
+
+	/* Minimal barrier to prevent accesses leaking out of lock. */
+	__CPROVER_fence("RRfence", "RWfence");
+
+	return !old_locked;
+#endif
+}
+
+static inline void lock_impl_lock(struct lock_impl *lock)
+{
+	/*
+	 * CBMC doesn't support busy waiting, so just assume that the
+	 * lock is available.
+	 */
+	assume(lock_impl_trylock(lock));
+
+	/*
+	 * If the lock was already held by this thread then the assumption
+	 * is unsatisfiable (deadlock).
+	 */
+}
+
+static inline void lock_impl_unlock(struct lock_impl *lock)
+{
+#ifdef RUN
+	BUG_ON(!__sync_bool_compare_and_swap(&lock->locked, true, false));
+#else
+	/* Minimal barrier to prevent accesses leaking out of lock. */
+	__CPROVER_fence("RWfence", "WWfence");
+
+	__CPROVER_atomic_begin();
+	bool old_locked = lock->locked;
+	lock->locked = false;
+	__CPROVER_atomic_end();
+
+	BUG_ON(!old_locked);
+#endif
+}
+
+static inline void lock_impl_init(struct lock_impl *lock)
+{
+	lock->locked = false;
+}
+
+#define LOCK_IMPL_INITIALIZER {.locked = false}
+
+#endif /* !defined(PTHREAD_LOCK) */
+
+/*
+ * Implement spinlocks using the lock mechanism. Wrap the lock to prevent mixing
+ * locks of different types.
+ */
+typedef struct {
+	struct lock_impl internal_lock;
+} spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED {.internal_lock = LOCK_IMPL_INITIALIZER}
+#define __SPIN_LOCK_UNLOCKED(x) SPIN_LOCK_UNLOCKED
+#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+	lock_impl_init(&lock->internal_lock);
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+	/*
+	 * Spin locks also need to be removed in order to eliminate all
+	 * memory barriers. They are only used by the write side anyway.
+	 */
+#ifndef NO_SYNC_SMP_MB
+	preempt_disable();
+	lock_impl_lock(&lock->internal_lock);
+#endif
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+#ifndef NO_SYNC_SMP_MB
+	lock_impl_unlock(&lock->internal_lock);
+	preempt_enable();
+#endif
+}
+
+/* Don't bother with interrupts */
+#define spin_lock_irq(lock) spin_lock(lock)
+#define spin_unlock_irq(lock) spin_unlock(lock)
+#define spin_lock_irqsave(lock, flags) spin_lock(lock)
+#define spin_unlock_irqrestore(lock, flags) spin_unlock(lock)
+
+/*
+ * This is supposed to return an int, but I think that a bool should work as
+ * well.
+ */
+static inline bool spin_trylock(spinlock_t *lock)
+{
+#ifndef NO_SYNC_SMP_MB
+	preempt_disable();
+	return lock_impl_trylock(&lock->internal_lock);
+#else
+	return true;
+#endif
+}
+
+struct completion {
+	/* Hopefuly this won't overflow. */
+	unsigned int count;
+};
+
+#define COMPLETION_INITIALIZER(x) {.count = 0}
+#define DECLARE_COMPLETION(x) struct completion x = COMPLETION_INITIALIZER(x)
+#define DECLARE_COMPLETION_ONSTACK(x) DECLARE_COMPLETION(x)
+
+static inline void init_completion(struct completion *c)
+{
+	c->count = 0;
+}
+
+static inline void wait_for_completion(struct completion *c)
+{
+	unsigned int prev_count = __sync_fetch_and_sub(&c->count, 1);
+
+	assume(prev_count);
+}
+
+static inline void complete(struct completion *c)
+{
+	unsigned int prev_count = __sync_fetch_and_add(&c->count, 1);
+
+	BUG_ON(prev_count == UINT_MAX);
+}
+
+/* This function probably isn't very useful for CBMC. */
+static inline bool try_wait_for_completion(struct completion *c)
+{
+	BUG();
+}
+
+static inline bool completion_done(struct completion *c)
+{
+	return c->count;
+}
+
+/* TODO: Implement complete_all */
+static inline void complete_all(struct completion *c)
+{
+	BUG();
+}
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
new file mode 100644
index 000000000..9440cc39e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include "misc.h"
+#include "bug_on.h"
+
+struct rcu_head;
+
+void wakeme_after_rcu(struct rcu_head *head)
+{
+	BUG();
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
new file mode 100644
index 000000000..aca50030f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
@@ -0,0 +1,58 @@
+#ifndef MISC_H
+#define MISC_H
+
+#include "assume.h"
+#include "int_typedefs.h"
+#include "locks.h"
+
+#include <linux/types.h>
+
+/* Probably won't need to deal with bottom halves. */
+static inline void local_bh_disable(void) {}
+static inline void local_bh_enable(void) {}
+
+#define MODULE_ALIAS(X)
+#define module_param(...)
+#define EXPORT_SYMBOL_GPL(x)
+
+#define container_of(ptr, type, member) ({			\
+	const typeof(((type *)0)->member) *__mptr = (ptr);	\
+	(type *)((char *)__mptr - offsetof(type, member));	\
+})
+
+#ifndef USE_SIMPLE_SYNC_SRCU
+/* Abuse udelay to make sure that busy loops terminate. */
+#define udelay(x) assume(0)
+
+#else
+
+/* The simple custom synchronize_srcu is ok with try_check_zero failing. */
+#define udelay(x) do { } while (0)
+#endif
+
+#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+	do { } while (0)
+
+#define notrace
+
+/* Avoid including rcupdate.h */
+struct rcu_synchronize {
+	struct rcu_head head;
+	struct completion completion;
+};
+
+void wakeme_after_rcu(struct rcu_head *head);
+
+#define rcu_lock_acquire(a) do { } while (0)
+#define rcu_lock_release(a) do { } while (0)
+#define rcu_lockdep_assert(c, s) do { } while (0)
+#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
+
+/* Let CBMC non-deterministically choose switch between normal and expedited. */
+bool rcu_gp_is_normal(void);
+bool rcu_gp_is_expedited(void);
+
+/* Do the same for old versions of rcu. */
+#define rcu_expedited (rcu_gp_is_expedited())
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
new file mode 100644
index 000000000..27e67a3f2
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERCPU_H
+#define PERCPU_H
+
+#include <stddef.h>
+#include "bug_on.h"
+#include "preempt.h"
+
+#define __percpu
+
+/* Maximum size of any percpu data. */
+#define PERCPU_OFFSET (4 * sizeof(long))
+
+/* Ignore alignment, as CBMC doesn't care about false sharing. */
+#define alloc_percpu(type) __alloc_percpu(sizeof(type), 1)
+
+static inline void *__alloc_percpu(size_t size, size_t align)
+{
+	BUG();
+	return NULL;
+}
+
+static inline void free_percpu(void *ptr)
+{
+	BUG();
+}
+
+#define per_cpu_ptr(ptr, cpu) \
+	((typeof(ptr)) ((char *) (ptr) + PERCPU_OFFSET * cpu))
+
+#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1)
+#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1)
+#define __this_cpu_sub(pcp, n) __this_cpu_add(pcp, -(typeof(pcp)) (n))
+
+#define this_cpu_inc(pcp) this_cpu_add(pcp, 1)
+#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1)
+#define this_cpu_sub(pcp, n) this_cpu_add(pcp, -(typeof(pcp)) (n))
+
+/* Make CBMC use atomics to work around bug. */
+#ifdef RUN
+#define THIS_CPU_ADD_HELPER(ptr, x) (*(ptr) += (x))
+#else
+/*
+ * Split the atomic into a read and a write so that it has the least
+ * possible ordering.
+ */
+#define THIS_CPU_ADD_HELPER(ptr, x) \
+	do { \
+		typeof(ptr) this_cpu_add_helper_ptr = (ptr); \
+		typeof(ptr) this_cpu_add_helper_x = (x); \
+		typeof(*ptr) this_cpu_add_helper_temp; \
+		__CPROVER_atomic_begin(); \
+		this_cpu_add_helper_temp = *(this_cpu_add_helper_ptr); \
+		__CPROVER_atomic_end(); \
+		this_cpu_add_helper_temp += this_cpu_add_helper_x; \
+		__CPROVER_atomic_begin(); \
+		*(this_cpu_add_helper_ptr) = this_cpu_add_helper_temp; \
+		__CPROVER_atomic_end(); \
+	} while (0)
+#endif
+
+/*
+ * For some reason CBMC needs an atomic operation even though this is percpu
+ * data.
+ */
+#define __this_cpu_add(pcp, n) \
+	do { \
+		BUG_ON(preemptible()); \
+		THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), thread_cpu_id), \
+				    (typeof(pcp)) (n)); \
+	} while (0)
+
+#define this_cpu_add(pcp, n) \
+	do { \
+		int this_cpu_add_impl_cpu = get_cpu(); \
+		THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), this_cpu_add_impl_cpu), \
+				    (typeof(pcp)) (n)); \
+		put_cpu(); \
+	} while (0)
+
+/*
+ * This will cause a compiler warning because of the cast from char[][] to
+ * type*. This will cause a compile time error if type is too big.
+ */
+#define DEFINE_PER_CPU(type, name) \
+	char name[NR_CPUS][PERCPU_OFFSET]; \
+	typedef char percpu_too_big_##name \
+		[sizeof(type) > PERCPU_OFFSET ? -1 : 1]
+
+#define for_each_possible_cpu(cpu) \
+	for ((cpu) = 0; (cpu) < NR_CPUS; ++(cpu))
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
new file mode 100644
index 000000000..b4083ae34
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include "preempt.h"
+
+#include "assume.h"
+#include "locks.h"
+
+/* Support NR_CPUS of at most 64 */
+#define CPU_PREEMPTION_LOCKS_INIT0 LOCK_IMPL_INITIALIZER
+#define CPU_PREEMPTION_LOCKS_INIT1 \
+	CPU_PREEMPTION_LOCKS_INIT0, CPU_PREEMPTION_LOCKS_INIT0
+#define CPU_PREEMPTION_LOCKS_INIT2 \
+	CPU_PREEMPTION_LOCKS_INIT1, CPU_PREEMPTION_LOCKS_INIT1
+#define CPU_PREEMPTION_LOCKS_INIT3 \
+	CPU_PREEMPTION_LOCKS_INIT2, CPU_PREEMPTION_LOCKS_INIT2
+#define CPU_PREEMPTION_LOCKS_INIT4 \
+	CPU_PREEMPTION_LOCKS_INIT3, CPU_PREEMPTION_LOCKS_INIT3
+#define CPU_PREEMPTION_LOCKS_INIT5 \
+	CPU_PREEMPTION_LOCKS_INIT4, CPU_PREEMPTION_LOCKS_INIT4
+
+/*
+ * Simulate disabling preemption by locking a particular cpu. NR_CPUS
+ * should be the actual number of cpus, not just the maximum.
+ */
+struct lock_impl cpu_preemption_locks[NR_CPUS] = {
+	CPU_PREEMPTION_LOCKS_INIT0
+#if (NR_CPUS - 1) & 1
+	, CPU_PREEMPTION_LOCKS_INIT0
+#endif
+#if (NR_CPUS - 1) & 2
+	, CPU_PREEMPTION_LOCKS_INIT1
+#endif
+#if (NR_CPUS - 1) & 4
+	, CPU_PREEMPTION_LOCKS_INIT2
+#endif
+#if (NR_CPUS - 1) & 8
+	, CPU_PREEMPTION_LOCKS_INIT3
+#endif
+#if (NR_CPUS - 1) & 16
+	, CPU_PREEMPTION_LOCKS_INIT4
+#endif
+#if (NR_CPUS - 1) & 32
+	, CPU_PREEMPTION_LOCKS_INIT5
+#endif
+};
+
+#undef CPU_PREEMPTION_LOCKS_INIT0
+#undef CPU_PREEMPTION_LOCKS_INIT1
+#undef CPU_PREEMPTION_LOCKS_INIT2
+#undef CPU_PREEMPTION_LOCKS_INIT3
+#undef CPU_PREEMPTION_LOCKS_INIT4
+#undef CPU_PREEMPTION_LOCKS_INIT5
+
+__thread int thread_cpu_id;
+__thread int preempt_disable_count;
+
+void preempt_disable(void)
+{
+	BUG_ON(preempt_disable_count < 0 || preempt_disable_count == INT_MAX);
+
+	if (preempt_disable_count++)
+		return;
+
+	thread_cpu_id = nondet_int();
+	assume(thread_cpu_id >= 0);
+	assume(thread_cpu_id < NR_CPUS);
+	lock_impl_lock(&cpu_preemption_locks[thread_cpu_id]);
+}
+
+void preempt_enable(void)
+{
+	BUG_ON(preempt_disable_count < 1);
+
+	if (--preempt_disable_count)
+		return;
+
+	lock_impl_unlock(&cpu_preemption_locks[thread_cpu_id]);
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
new file mode 100644
index 000000000..f8b762cd2
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PREEMPT_H
+#define PREEMPT_H
+
+#include <stdbool.h>
+
+#include "bug_on.h"
+
+/* This flag contains garbage if preempt_disable_count is 0. */
+extern __thread int thread_cpu_id;
+
+/* Support recursive preemption disabling. */
+extern __thread int preempt_disable_count;
+
+void preempt_disable(void);
+void preempt_enable(void);
+
+static inline void preempt_disable_notrace(void)
+{
+	preempt_disable();
+}
+
+static inline void preempt_enable_no_resched(void)
+{
+	preempt_enable();
+}
+
+static inline void preempt_enable_notrace(void)
+{
+	preempt_enable();
+}
+
+static inline int preempt_count(void)
+{
+	return preempt_disable_count;
+}
+
+static inline bool preemptible(void)
+{
+	return !preempt_count();
+}
+
+static inline int get_cpu(void)
+{
+	preempt_disable();
+	return thread_cpu_id;
+}
+
+static inline void put_cpu(void)
+{
+	preempt_enable();
+}
+
+static inline void might_sleep(void)
+{
+	BUG_ON(preempt_disable_count);
+}
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
new file mode 100644
index 000000000..97f592048
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "int_typedefs.h"
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "locks.h"
+#include "misc.h"
+#include "preempt.h"
+#include "percpu.h"
+#include "workqueues.h"
+
+#include <linux/srcu.h>
+
+/* Functions needed from modify_srcu.c */
+bool try_check_zero(struct srcu_struct *sp, int idx, int trycount);
+void srcu_flip(struct srcu_struct *sp);
+
+/* Simpler implementation of synchronize_srcu that ignores batching. */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+	int idx;
+	/*
+	 * This code assumes that try_check_zero will succeed anyway,
+	 * so there is no point in multiple tries.
+	 */
+	const int trycount = 1;
+
+	might_sleep();
+
+	/* Ignore the lock, as multiple writers aren't working yet anyway. */
+
+	idx = 1 ^ (sp->completed & 1);
+
+	/* For comments see srcu_advance_batches. */
+
+	assume(try_check_zero(sp, idx, trycount));
+
+	srcu_flip(sp);
+
+	assume(try_check_zero(sp, idx^1, trycount));
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
new file mode 100644
index 000000000..28b960300
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef WORKQUEUES_H
+#define WORKQUEUES_H
+
+#include <stdbool.h>
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "int_typedefs.h"
+
+#include <linux/types.h>
+
+/* Stub workqueue implementation. */
+
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
+void delayed_work_timer_fn(unsigned long __data);
+
+struct work_struct {
+/*	atomic_long_t data; */
+	unsigned long data;
+
+	struct list_head entry;
+	work_func_t func;
+#ifdef CONFIG_LOCKDEP
+	struct lockdep_map lockdep_map;
+#endif
+};
+
+struct timer_list {
+	struct hlist_node	entry;
+	unsigned long		expires;
+	void			(*function)(unsigned long);
+	unsigned long		data;
+	u32			flags;
+	int			slack;
+};
+
+struct delayed_work {
+	struct work_struct work;
+	struct timer_list timer;
+
+	/* target workqueue and CPU ->timer uses to queue ->work */
+	struct workqueue_struct *wq;
+	int cpu;
+};
+
+
+static inline bool schedule_work(struct work_struct *work)
+{
+	BUG();
+	return true;
+}
+
+static inline bool schedule_work_on(int cpu, struct work_struct *work)
+{
+	BUG();
+	return true;
+}
+
+static inline bool queue_work(struct workqueue_struct *wq,
+			      struct work_struct *work)
+{
+	BUG();
+	return true;
+}
+
+static inline bool queue_delayed_work(struct workqueue_struct *wq,
+				      struct delayed_work *dwork,
+				      unsigned long delay)
+{
+	BUG();
+	return true;
+}
+
+#define INIT_WORK(w, f) \
+	do { \
+		(w)->data = 0; \
+		(w)->func = (f); \
+	} while (0)
+
+#define INIT_DELAYED_WORK(w, f) INIT_WORK(&(w)->work, (f))
+
+#define __WORK_INITIALIZER(n, f) { \
+		.data = 0, \
+		.entry = { &(n).entry, &(n).entry }, \
+		.func = f \
+	}
+
+/* Don't bother initializing timer. */
+#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
+	.work = __WORK_INITIALIZER((n).work, (f)), \
+	}
+
+#define DECLARE_WORK(n, f) \
+	struct workqueue_struct n = __WORK_INITIALIZER
+
+#define DECLARE_DELAYED_WORK(n, f) \
+	struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
+
+#define system_power_efficient_wq ((struct workqueue_struct *) NULL)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
new file mode 100644
index 000000000..f47cb2045
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
@@ -0,0 +1 @@
+*.out
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
new file mode 100644
index 000000000..ad21b925f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+CBMC_FLAGS = -I../.. -I../../src -I../../include -I../../empty_includes -32 -pointer-check -mm pso
+
+all:
+	for i in ./*.pass; do \
+		echo $$i ; \
+		CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-pass $$i > $$i.out 2>&1 ; \
+	done
+	for i in ./*.fail; do \
+		echo $$i ; \
+		CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-fail $$i > $$i.out 2>&1 ; \
+	done
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
new file mode 100644
index 000000000..40c807591
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DASSERT_END"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
new file mode 100644
index 000000000..ada5baf0b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
new file mode 100644
index 000000000..8fe00c8db
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE_2"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
new file mode 100644
index 000000000..612ed6772
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE_3"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
new file mode 100644
index 000000000..2ce2016f7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <src/combined_source.c>
+
+int x;
+int y;
+
+int __unbuffered_tpr_x;
+int __unbuffered_tpr_y;
+
+DEFINE_SRCU(ss);
+
+void rcu_reader(void)
+{
+	int idx;
+
+#ifndef FORCE_FAILURE_3
+	idx = srcu_read_lock(&ss);
+#endif
+	might_sleep();
+
+	__unbuffered_tpr_y = READ_ONCE(y);
+#ifdef FORCE_FAILURE
+	srcu_read_unlock(&ss, idx);
+	idx = srcu_read_lock(&ss);
+#endif
+	WRITE_ONCE(x, 1);
+
+#ifndef FORCE_FAILURE_3
+	srcu_read_unlock(&ss, idx);
+#endif
+	might_sleep();
+}
+
+void *thread_update(void *arg)
+{
+	WRITE_ONCE(y, 1);
+#ifndef FORCE_FAILURE_2
+	synchronize_srcu(&ss);
+#endif
+	might_sleep();
+	__unbuffered_tpr_x = READ_ONCE(x);
+
+	return NULL;
+}
+
+void *thread_process_reader(void *arg)
+{
+	rcu_reader();
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	pthread_t tu;
+	pthread_t tpr;
+
+	if (pthread_create(&tu, NULL, thread_update, NULL))
+		abort();
+	if (pthread_create(&tpr, NULL, thread_process_reader, NULL))
+		abort();
+	if (pthread_join(tu, NULL))
+		abort();
+	if (pthread_join(tpr, NULL))
+		abort();
+	assert(__unbuffered_tpr_y != 0 || __unbuffered_tpr_x != 0);
+
+#ifdef ASSERT_END
+	assert(0);
+#endif
+
+	return 0;
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
new file mode 100755
index 000000000..2fe1f0339
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# This script expects a mode (either --should-pass or --should-fail) followed by
+# an input file. The script uses the following environment variables. The test C
+# source file is expected to be named test.c in the directory containing the
+# input file.
+#
+# CBMC: The command to run CBMC. Default: cbmc
+# CBMC_FLAGS: Additional flags to pass to CBMC
+# NR_CPUS: Number of cpus to run tests with. Default specified by the test
+# SYNC_SRCU_MODE: Choose implementation of synchronize_srcu. Defaults to simple.
+#                 kernel: Version included in the linux kernel source.
+#                 simple: Use try_check_zero directly.
+#
+# The input file is a script that is sourced by this file. It can define any of
+# the following variables to configure the test.
+#
+# test_cbmc_options: Extra options to pass to CBMC.
+# min_cpus_fail: Minimum number of CPUs (NR_CPUS) for verification to fail.
+#                The test is expected to pass if it is run with fewer. (Only
+#                useful for .fail files)
+# default_cpus: Quantity of CPUs to use for the test, if not specified on the
+#               command line. Default: Larger of 2 and MIN_CPUS_FAIL.
+
+set -e
+
+if test "$#" -ne 2; then
+	echo "Expected one option followed by an input file" 1>&2
+	exit 99
+fi
+
+if test "x$1" = "x--should-pass"; then
+	should_pass="yes"
+elif test "x$1" = "x--should-fail"; then
+	should_pass="no"
+else
+	echo "Unrecognized argument '$1'" 1>&2
+
+	# Exit code 99 indicates a hard error.
+	exit 99
+fi
+
+CBMC=${CBMC:-cbmc}
+
+SYNC_SRCU_MODE=${SYNC_SRCU_MODE:-simple}
+
+case ${SYNC_SRCU_MODE} in
+kernel) sync_srcu_mode_flags="" ;;
+simple) sync_srcu_mode_flags="-DUSE_SIMPLE_SYNC_SRCU" ;;
+
+*)
+	echo "Unrecognized argument '${SYNC_SRCU_MODE}'" 1>&2
+	exit 99
+	;;
+esac
+
+min_cpus_fail=1
+
+c_file=`dirname "$2"`/test.c
+
+# Source the input file.
+. $2
+
+if test ${min_cpus_fail} -gt 2; then
+	default_default_cpus=${min_cpus_fail}
+else
+	default_default_cpus=2
+fi
+default_cpus=${default_cpus:-${default_default_cpus}}
+cpus=${NR_CPUS:-${default_cpus}}
+
+# Check if there are two few cpus to make the test fail.
+if test $cpus -lt ${min_cpus_fail:-0}; then
+	should_pass="yes"
+fi
+
+cbmc_opts="-DNR_CPUS=${cpus} ${sync_srcu_mode_flags} ${test_cbmc_options} ${CBMC_FLAGS}"
+
+echo "Running CBMC: ${CBMC} ${cbmc_opts} ${c_file}"
+if ${CBMC} ${cbmc_opts} "${c_file}"; then
+	# Verification successful. Make sure that it was supposed to verify.
+	test "x${should_pass}" = xyes
+else
+	cbmc_exit_status=$?
+
+	# An exit status of 10 indicates a failed verification.
+	# (see cbmc_parse_optionst::do_bmc in the CBMC source code)
+	if test ${cbmc_exit_status} -eq 10 && test "x${should_pass}" = xno; then
+		:
+	else
+		echo "CBMC returned ${cbmc_exit_status} exit status" 1>&2
+
+		# Parse errors have exit status 6. Any other type of error
+		# should be considered a hard error.
+		if test ${cbmc_exit_status} -ne 6 && \
+		   test ${cbmc_exit_status} -ne 10; then
+			exit 99
+		else
+			exit 1
+		fi
+	fi
+fi
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
new file mode 100644
index 000000000..cc610da7e
--- /dev/null
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -0,0 +1,6 @@
+basic_percpu_ops_test
+basic_test
+basic_rseq_op_test
+param_test
+param_test_benchmark
+param_test_compare_twice
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
new file mode 100644
index 000000000..c30c52e1d
--- /dev/null
+++ b/tools/testing/selftests/rseq/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
+LDLIBS += -lpthread
+
+# Own dependencies because we only want to build against 1st prerequisite, but
+# still track changes to header files and depend on shared object.
+OVERRIDE_TARGETS = 1
+
+TEST_GEN_PROGS = basic_test basic_percpu_ops_test param_test \
+		param_test_benchmark param_test_compare_twice
+
+TEST_GEN_PROGS_EXTENDED = librseq.so
+
+TEST_PROGS = run_param_test.sh
+
+include ../lib.mk
+
+$(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h
+	$(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@
+
+$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
+	$(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+					rseq.h rseq-*.h
+	$(CC) $(CFLAGS) -DBENCHMARK $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+					rseq.h rseq-*.h
+	$(CC) $(CFLAGS) -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
new file mode 100644
index 000000000..eb3f6db36
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+
+#include "rseq.h"
+
+#define ARRAY_SIZE(arr)	(sizeof(arr) / sizeof((arr)[0]))
+
+struct percpu_lock_entry {
+	intptr_t v;
+} __attribute__((aligned(128)));
+
+struct percpu_lock {
+	struct percpu_lock_entry c[CPU_SETSIZE];
+};
+
+struct test_data_entry {
+	intptr_t count;
+} __attribute__((aligned(128)));
+
+struct spinlock_test_data {
+	struct percpu_lock lock;
+	struct test_data_entry c[CPU_SETSIZE];
+	int reps;
+};
+
+struct percpu_list_node {
+	intptr_t data;
+	struct percpu_list_node *next;
+};
+
+struct percpu_list_entry {
+	struct percpu_list_node *head;
+} __attribute__((aligned(128)));
+
+struct percpu_list {
+	struct percpu_list_entry c[CPU_SETSIZE];
+};
+
+/* A simple percpu spinlock.  Returns the cpu lock was acquired on. */
+int rseq_this_cpu_lock(struct percpu_lock *lock)
+{
+	int cpu;
+
+	for (;;) {
+		int ret;
+
+		cpu = rseq_cpu_start();
+		ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+					 0, 1, cpu);
+		if (rseq_likely(!ret))
+			break;
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	/*
+	 * Acquire semantic when taking lock after control dependency.
+	 * Matches rseq_smp_store_release().
+	 */
+	rseq_smp_acquire__after_ctrl_dep();
+	return cpu;
+}
+
+void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+{
+	assert(lock->c[cpu].v == 1);
+	/*
+	 * Release lock, with release semantic. Matches
+	 * rseq_smp_acquire__after_ctrl_dep().
+	 */
+	rseq_smp_store_release(&lock->c[cpu].v, 0);
+}
+
+void *test_percpu_spinlock_thread(void *arg)
+{
+	struct spinlock_test_data *data = arg;
+	int i, cpu;
+
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+	for (i = 0; i < data->reps; i++) {
+		cpu = rseq_this_cpu_lock(&data->lock);
+		data->c[cpu].count++;
+		rseq_percpu_unlock(&data->lock, cpu);
+	}
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+
+	return NULL;
+}
+
+/*
+ * A simple test which implements a sharded counter using a per-cpu
+ * lock.  Obviously real applications might prefer to simply use a
+ * per-cpu increment; however, this is reasonable for a test and the
+ * lock can be extended to synchronize more complicated operations.
+ */
+void test_percpu_spinlock(void)
+{
+	const int num_threads = 200;
+	int i;
+	uint64_t sum;
+	pthread_t test_threads[num_threads];
+	struct spinlock_test_data data;
+
+	memset(&data, 0, sizeof(data));
+	data.reps = 5000;
+
+	for (i = 0; i < num_threads; i++)
+		pthread_create(&test_threads[i], NULL,
+			       test_percpu_spinlock_thread, &data);
+
+	for (i = 0; i < num_threads; i++)
+		pthread_join(test_threads[i], NULL);
+
+	sum = 0;
+	for (i = 0; i < CPU_SETSIZE; i++)
+		sum += data.c[i].count;
+
+	assert(sum == (uint64_t)data.reps * num_threads);
+}
+
+void this_cpu_list_push(struct percpu_list *list,
+			struct percpu_list_node *node,
+			int *_cpu)
+{
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr, newval, expect;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		/* Load list->c[cpu].head with single-copy atomicity. */
+		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+		newval = (intptr_t)node;
+		targetptr = (intptr_t *)&list->c[cpu].head;
+		node->next = (struct percpu_list_node *)expect;
+		ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+		if (rseq_likely(!ret))
+			break;
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+}
+
+/*
+ * Unlike a traditional lock-less linked list; the availability of a
+ * rseq primitive allows us to implement pop without concerns over
+ * ABA-type races.
+ */
+struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+					   int *_cpu)
+{
+	for (;;) {
+		struct percpu_list_node *head;
+		intptr_t *targetptr, expectnot, *load;
+		off_t offset;
+		int ret, cpu;
+
+		cpu = rseq_cpu_start();
+		targetptr = (intptr_t *)&list->c[cpu].head;
+		expectnot = (intptr_t)NULL;
+		offset = offsetof(struct percpu_list_node, next);
+		load = (intptr_t *)&head;
+		ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
+						 offset, load, cpu);
+		if (rseq_likely(!ret)) {
+			if (_cpu)
+				*_cpu = cpu;
+			return head;
+		}
+		if (ret > 0)
+			return NULL;
+		/* Retry if rseq aborts. */
+	}
+}
+
+/*
+ * __percpu_list_pop is not safe against concurrent accesses. Should
+ * only be used on lists that are not concurrently modified.
+ */
+struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+{
+	struct percpu_list_node *node;
+
+	node = list->c[cpu].head;
+	if (!node)
+		return NULL;
+	list->c[cpu].head = node->next;
+	return node;
+}
+
+void *test_percpu_list_thread(void *arg)
+{
+	int i;
+	struct percpu_list *list = (struct percpu_list *)arg;
+
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+
+	for (i = 0; i < 100000; i++) {
+		struct percpu_list_node *node;
+
+		node = this_cpu_list_pop(list, NULL);
+		sched_yield();  /* encourage shuffling */
+		if (node)
+			this_cpu_list_push(list, node, NULL);
+	}
+
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+
+	return NULL;
+}
+
+/* Simultaneous modification to a per-cpu linked list from many threads.  */
+void test_percpu_list(void)
+{
+	int i, j;
+	uint64_t sum = 0, expected_sum = 0;
+	struct percpu_list list;
+	pthread_t test_threads[200];
+	cpu_set_t allowed_cpus;
+
+	memset(&list, 0, sizeof(list));
+
+	/* Generate list entries for every usable cpu. */
+	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+		for (j = 1; j <= 100; j++) {
+			struct percpu_list_node *node;
+
+			expected_sum += j;
+
+			node = malloc(sizeof(*node));
+			assert(node);
+			node->data = j;
+			node->next = list.c[i].head;
+			list.c[i].head = node;
+		}
+	}
+
+	for (i = 0; i < 200; i++)
+		pthread_create(&test_threads[i], NULL,
+		       test_percpu_list_thread, &list);
+
+	for (i = 0; i < 200; i++)
+		pthread_join(test_threads[i], NULL);
+
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		struct percpu_list_node *node;
+
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+
+		while ((node = __percpu_list_pop(&list, i))) {
+			sum += node->data;
+			free(node);
+		}
+	}
+
+	/*
+	 * All entries should now be accounted for (unless some external
+	 * actor is interfering with our allowed affinity while this
+	 * test is running).
+	 */
+	assert(sum == expected_sum);
+}
+
+int main(int argc, char **argv)
+{
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		goto error;
+	}
+	printf("spinlock\n");
+	test_percpu_spinlock();
+	printf("percpu_list\n");
+	test_percpu_list();
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		goto error;
+	}
+	return 0;
+
+error:
+	return -1;
+}
diff --git a/tools/testing/selftests/rseq/basic_test.c b/tools/testing/selftests/rseq/basic_test.c
new file mode 100644
index 000000000..d8efbfb89
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_test.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Basic test coverage for critical regions and rseq_current_cpu().
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "rseq.h"
+
+void test_cpu_pointer(void)
+{
+	cpu_set_t affinity, test_affinity;
+	int i;
+
+	sched_getaffinity(0, sizeof(affinity), &affinity);
+	CPU_ZERO(&test_affinity);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (CPU_ISSET(i, &affinity)) {
+			CPU_SET(i, &test_affinity);
+			sched_setaffinity(0, sizeof(test_affinity),
+					&test_affinity);
+			assert(sched_getcpu() == i);
+			assert(rseq_current_cpu() == i);
+			assert(rseq_current_cpu_raw() == i);
+			assert(rseq_cpu_start() == i);
+			CPU_CLR(i, &test_affinity);
+		}
+	}
+	sched_setaffinity(0, sizeof(affinity), &affinity);
+}
+
+int main(int argc, char **argv)
+{
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		goto init_thread_error;
+	}
+	printf("testing current cpu\n");
+	test_cpu_pointer();
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		goto init_thread_error;
+	}
+	return 0;
+
+init_thread_error:
+	return -1;
+}
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
new file mode 100644
index 000000000..e8a657a5f
--- /dev/null
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -0,0 +1,1331 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <poll.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+#include <stddef.h>
+
+static inline pid_t rseq_gettid(void)
+{
+	return syscall(__NR_gettid);
+}
+
+#define NR_INJECT	9
+static int loop_cnt[NR_INJECT + 1];
+
+static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
+static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
+static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
+static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
+static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
+static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
+
+static int opt_modulo, verbose;
+
+static int opt_yield, opt_signal, opt_sleep,
+		opt_disable_rseq, opt_threads = 200,
+		opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
+
+#ifndef RSEQ_SKIP_FASTPATH
+static long long opt_reps = 5000;
+#else
+static long long opt_reps = 100;
+#endif
+
+static __thread __attribute__((tls_model("initial-exec")))
+unsigned int signals_delivered;
+
+#ifndef BENCHMARK
+
+static __thread __attribute__((tls_model("initial-exec"), unused))
+unsigned int yield_mod_cnt, nr_abort;
+
+#define printf_verbose(fmt, ...)			\
+	do {						\
+		if (verbose)				\
+			printf(fmt, ## __VA_ARGS__);	\
+	} while (0)
+
+#ifdef __i386__
+
+#define INJECT_ASM_REG	"eax"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
+	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
+	"jz 333f\n\t" \
+	"222:\n\t" \
+	"dec %%" INJECT_ASM_REG "\n\t" \
+	"jnz 222b\n\t" \
+	"333:\n\t"
+
+#elif defined(__x86_64__)
+
+#define INJECT_ASM_REG_P	"rax"
+#define INJECT_ASM_REG		"eax"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG_P \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
+	"mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
+	"test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
+	"jz 333f\n\t" \
+	"222:\n\t" \
+	"dec %%" INJECT_ASM_REG "\n\t" \
+	"jnz 222b\n\t" \
+	"333:\n\t"
+
+#elif defined(__s390__)
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG	"r12"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+	"ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
+	"je 333f\n\t" \
+	"222:\n\t" \
+	"ahi %%" INJECT_ASM_REG ", -1\n\t" \
+	"jnz 222b\n\t" \
+	"333:\n\t"
+
+#elif defined(__ARMEL__)
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG	"r4"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+	"cmp " INJECT_ASM_REG ", #0\n\t" \
+	"beq 333f\n\t" \
+	"222:\n\t" \
+	"subs " INJECT_ASM_REG ", #1\n\t" \
+	"bne 222b\n\t" \
+	"333:\n\t"
+
+#elif defined(__AARCH64EL__)
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1] "Qo" (loop_cnt[1]) \
+	, [loop_cnt_2] "Qo" (loop_cnt[2]) \
+	, [loop_cnt_3] "Qo" (loop_cnt[3]) \
+	, [loop_cnt_4] "Qo" (loop_cnt[4]) \
+	, [loop_cnt_5] "Qo" (loop_cnt[5]) \
+	, [loop_cnt_6] "Qo" (loop_cnt[6])
+
+#define INJECT_ASM_REG	RSEQ_ASM_TMP_REG32
+
+#define RSEQ_INJECT_ASM(n) \
+	"	ldr	" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n"	\
+	"	cbz	" INJECT_ASM_REG ", 333f\n"			\
+	"222:\n"							\
+	"	sub	" INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n"	\
+	"	cbnz	" INJECT_ASM_REG ", 222b\n"			\
+	"333:\n"
+
+#elif __PPC__
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG	"r18"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+	"cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
+	"beq 333f\n\t" \
+	"222:\n\t" \
+	"subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
+	"bne 222b\n\t" \
+	"333:\n\t"
+
+#elif defined(__mips__)
+
+#define RSEQ_INJECT_INPUT \
+	, [loop_cnt_1]"m"(loop_cnt[1]) \
+	, [loop_cnt_2]"m"(loop_cnt[2]) \
+	, [loop_cnt_3]"m"(loop_cnt[3]) \
+	, [loop_cnt_4]"m"(loop_cnt[4]) \
+	, [loop_cnt_5]"m"(loop_cnt[5]) \
+	, [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG	"$5"
+
+#define RSEQ_INJECT_CLOBBER \
+	, INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+	"lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+	"beqz " INJECT_ASM_REG ", 333f\n\t" \
+	"222:\n\t" \
+	"addiu " INJECT_ASM_REG ", -1\n\t" \
+	"bnez " INJECT_ASM_REG ", 222b\n\t" \
+	"333:\n\t"
+
+#else
+#error unsupported target
+#endif
+
+#define RSEQ_INJECT_FAILED \
+	nr_abort++;
+
+#define RSEQ_INJECT_C(n) \
+{ \
+	int loc_i, loc_nr_loops = loop_cnt[n]; \
+	\
+	for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
+		rseq_barrier(); \
+	} \
+	if (loc_nr_loops == -1 && opt_modulo) { \
+		if (yield_mod_cnt == opt_modulo - 1) { \
+			if (opt_sleep > 0) \
+				poll(NULL, 0, opt_sleep); \
+			if (opt_yield) \
+				sched_yield(); \
+			if (opt_signal) \
+				raise(SIGUSR1); \
+			yield_mod_cnt = 0; \
+		} else { \
+			yield_mod_cnt++; \
+		} \
+	} \
+}
+
+#else
+
+#define printf_verbose(fmt, ...)
+
+#endif /* BENCHMARK */
+
+#include "rseq.h"
+
+struct percpu_lock_entry {
+	intptr_t v;
+} __attribute__((aligned(128)));
+
+struct percpu_lock {
+	struct percpu_lock_entry c[CPU_SETSIZE];
+};
+
+struct test_data_entry {
+	intptr_t count;
+} __attribute__((aligned(128)));
+
+struct spinlock_test_data {
+	struct percpu_lock lock;
+	struct test_data_entry c[CPU_SETSIZE];
+};
+
+struct spinlock_thread_test_data {
+	struct spinlock_test_data *data;
+	long long reps;
+	int reg;
+};
+
+struct inc_test_data {
+	struct test_data_entry c[CPU_SETSIZE];
+};
+
+struct inc_thread_test_data {
+	struct inc_test_data *data;
+	long long reps;
+	int reg;
+};
+
+struct percpu_list_node {
+	intptr_t data;
+	struct percpu_list_node *next;
+};
+
+struct percpu_list_entry {
+	struct percpu_list_node *head;
+} __attribute__((aligned(128)));
+
+struct percpu_list {
+	struct percpu_list_entry c[CPU_SETSIZE];
+};
+
+#define BUFFER_ITEM_PER_CPU	100
+
+struct percpu_buffer_node {
+	intptr_t data;
+};
+
+struct percpu_buffer_entry {
+	intptr_t offset;
+	intptr_t buflen;
+	struct percpu_buffer_node **array;
+} __attribute__((aligned(128)));
+
+struct percpu_buffer {
+	struct percpu_buffer_entry c[CPU_SETSIZE];
+};
+
+#define MEMCPY_BUFFER_ITEM_PER_CPU	100
+
+struct percpu_memcpy_buffer_node {
+	intptr_t data1;
+	uint64_t data2;
+};
+
+struct percpu_memcpy_buffer_entry {
+	intptr_t offset;
+	intptr_t buflen;
+	struct percpu_memcpy_buffer_node *array;
+} __attribute__((aligned(128)));
+
+struct percpu_memcpy_buffer {
+	struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
+};
+
+/* A simple percpu spinlock. Grabs lock on current cpu. */
+static int rseq_this_cpu_lock(struct percpu_lock *lock)
+{
+	int cpu;
+
+	for (;;) {
+		int ret;
+
+		cpu = rseq_cpu_start();
+		ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+					 0, 1, cpu);
+		if (rseq_likely(!ret))
+			break;
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	/*
+	 * Acquire semantic when taking lock after control dependency.
+	 * Matches rseq_smp_store_release().
+	 */
+	rseq_smp_acquire__after_ctrl_dep();
+	return cpu;
+}
+
+static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+{
+	assert(lock->c[cpu].v == 1);
+	/*
+	 * Release lock, with release semantic. Matches
+	 * rseq_smp_acquire__after_ctrl_dep().
+	 */
+	rseq_smp_store_release(&lock->c[cpu].v, 0);
+}
+
+void *test_percpu_spinlock_thread(void *arg)
+{
+	struct spinlock_thread_test_data *thread_data = arg;
+	struct spinlock_test_data *data = thread_data->data;
+	long long i, reps;
+
+	if (!opt_disable_rseq && thread_data->reg &&
+	    rseq_register_current_thread())
+		abort();
+	reps = thread_data->reps;
+	for (i = 0; i < reps; i++) {
+		int cpu = rseq_cpu_start();
+
+		cpu = rseq_this_cpu_lock(&data->lock);
+		data->c[cpu].count++;
+		rseq_percpu_unlock(&data->lock, cpu);
+#ifndef BENCHMARK
+		if (i != 0 && !(i % (reps / 10)))
+			printf_verbose("tid %d: count %lld\n",
+				       (int) rseq_gettid(), i);
+#endif
+	}
+	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+		       (int) rseq_gettid(), nr_abort, signals_delivered);
+	if (!opt_disable_rseq && thread_data->reg &&
+	    rseq_unregister_current_thread())
+		abort();
+	return NULL;
+}
+
+/*
+ * A simple test which implements a sharded counter using a per-cpu
+ * lock.  Obviously real applications might prefer to simply use a
+ * per-cpu increment; however, this is reasonable for a test and the
+ * lock can be extended to synchronize more complicated operations.
+ */
+void test_percpu_spinlock(void)
+{
+	const int num_threads = opt_threads;
+	int i, ret;
+	uint64_t sum;
+	pthread_t test_threads[num_threads];
+	struct spinlock_test_data data;
+	struct spinlock_thread_test_data thread_data[num_threads];
+
+	memset(&data, 0, sizeof(data));
+	for (i = 0; i < num_threads; i++) {
+		thread_data[i].reps = opt_reps;
+		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
+			thread_data[i].reg = 1;
+		else
+			thread_data[i].reg = 0;
+		thread_data[i].data = &data;
+		ret = pthread_create(&test_threads[i], NULL,
+				     test_percpu_spinlock_thread,
+				     &thread_data[i]);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(test_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	sum = 0;
+	for (i = 0; i < CPU_SETSIZE; i++)
+		sum += data.c[i].count;
+
+	assert(sum == (uint64_t)opt_reps * num_threads);
+}
+
+void *test_percpu_inc_thread(void *arg)
+{
+	struct inc_thread_test_data *thread_data = arg;
+	struct inc_test_data *data = thread_data->data;
+	long long i, reps;
+
+	if (!opt_disable_rseq && thread_data->reg &&
+	    rseq_register_current_thread())
+		abort();
+	reps = thread_data->reps;
+	for (i = 0; i < reps; i++) {
+		int ret;
+
+		do {
+			int cpu;
+
+			cpu = rseq_cpu_start();
+			ret = rseq_addv(&data->c[cpu].count, 1, cpu);
+		} while (rseq_unlikely(ret));
+#ifndef BENCHMARK
+		if (i != 0 && !(i % (reps / 10)))
+			printf_verbose("tid %d: count %lld\n",
+				       (int) rseq_gettid(), i);
+#endif
+	}
+	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+		       (int) rseq_gettid(), nr_abort, signals_delivered);
+	if (!opt_disable_rseq && thread_data->reg &&
+	    rseq_unregister_current_thread())
+		abort();
+	return NULL;
+}
+
+void test_percpu_inc(void)
+{
+	const int num_threads = opt_threads;
+	int i, ret;
+	uint64_t sum;
+	pthread_t test_threads[num_threads];
+	struct inc_test_data data;
+	struct inc_thread_test_data thread_data[num_threads];
+
+	memset(&data, 0, sizeof(data));
+	for (i = 0; i < num_threads; i++) {
+		thread_data[i].reps = opt_reps;
+		if (opt_disable_mod <= 0 || (i % opt_disable_mod))
+			thread_data[i].reg = 1;
+		else
+			thread_data[i].reg = 0;
+		thread_data[i].data = &data;
+		ret = pthread_create(&test_threads[i], NULL,
+				     test_percpu_inc_thread,
+				     &thread_data[i]);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(test_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	sum = 0;
+	for (i = 0; i < CPU_SETSIZE; i++)
+		sum += data.c[i].count;
+
+	assert(sum == (uint64_t)opt_reps * num_threads);
+}
+
+void this_cpu_list_push(struct percpu_list *list,
+			struct percpu_list_node *node,
+			int *_cpu)
+{
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr, newval, expect;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		/* Load list->c[cpu].head with single-copy atomicity. */
+		expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+		newval = (intptr_t)node;
+		targetptr = (intptr_t *)&list->c[cpu].head;
+		node->next = (struct percpu_list_node *)expect;
+		ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+		if (rseq_likely(!ret))
+			break;
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+}
+
+/*
+ * Unlike a traditional lock-less linked list; the availability of a
+ * rseq primitive allows us to implement pop without concerns over
+ * ABA-type races.
+ */
+struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+					   int *_cpu)
+{
+	struct percpu_list_node *node = NULL;
+	int cpu;
+
+	for (;;) {
+		struct percpu_list_node *head;
+		intptr_t *targetptr, expectnot, *load;
+		off_t offset;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		targetptr = (intptr_t *)&list->c[cpu].head;
+		expectnot = (intptr_t)NULL;
+		offset = offsetof(struct percpu_list_node, next);
+		load = (intptr_t *)&head;
+		ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
+						   offset, load, cpu);
+		if (rseq_likely(!ret)) {
+			node = head;
+			break;
+		}
+		if (ret > 0)
+			break;
+		/* Retry if rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+	return node;
+}
+
+/*
+ * __percpu_list_pop is not safe against concurrent accesses. Should
+ * only be used on lists that are not concurrently modified.
+ */
+struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+{
+	struct percpu_list_node *node;
+
+	node = list->c[cpu].head;
+	if (!node)
+		return NULL;
+	list->c[cpu].head = node->next;
+	return node;
+}
+
+void *test_percpu_list_thread(void *arg)
+{
+	long long i, reps;
+	struct percpu_list *list = (struct percpu_list *)arg;
+
+	if (!opt_disable_rseq && rseq_register_current_thread())
+		abort();
+
+	reps = opt_reps;
+	for (i = 0; i < reps; i++) {
+		struct percpu_list_node *node;
+
+		node = this_cpu_list_pop(list, NULL);
+		if (opt_yield)
+			sched_yield();  /* encourage shuffling */
+		if (node)
+			this_cpu_list_push(list, node, NULL);
+	}
+
+	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+		       (int) rseq_gettid(), nr_abort, signals_delivered);
+	if (!opt_disable_rseq && rseq_unregister_current_thread())
+		abort();
+
+	return NULL;
+}
+
+/* Simultaneous modification to a per-cpu linked list from many threads.  */
+void test_percpu_list(void)
+{
+	const int num_threads = opt_threads;
+	int i, j, ret;
+	uint64_t sum = 0, expected_sum = 0;
+	struct percpu_list list;
+	pthread_t test_threads[num_threads];
+	cpu_set_t allowed_cpus;
+
+	memset(&list, 0, sizeof(list));
+
+	/* Generate list entries for every usable cpu. */
+	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+		for (j = 1; j <= 100; j++) {
+			struct percpu_list_node *node;
+
+			expected_sum += j;
+
+			node = malloc(sizeof(*node));
+			assert(node);
+			node->data = j;
+			node->next = list.c[i].head;
+			list.c[i].head = node;
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_create(&test_threads[i], NULL,
+				     test_percpu_list_thread, &list);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(test_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		struct percpu_list_node *node;
+
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+
+		while ((node = __percpu_list_pop(&list, i))) {
+			sum += node->data;
+			free(node);
+		}
+	}
+
+	/*
+	 * All entries should now be accounted for (unless some external
+	 * actor is interfering with our allowed affinity while this
+	 * test is running).
+	 */
+	assert(sum == expected_sum);
+}
+
+bool this_cpu_buffer_push(struct percpu_buffer *buffer,
+			  struct percpu_buffer_node *node,
+			  int *_cpu)
+{
+	bool result = false;
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr_spec, newval_spec;
+		intptr_t *targetptr_final, newval_final;
+		intptr_t offset;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+		if (offset == buffer->c[cpu].buflen)
+			break;
+		newval_spec = (intptr_t)node;
+		targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
+		newval_final = offset + 1;
+		targetptr_final = &buffer->c[cpu].offset;
+		if (opt_mb)
+			ret = rseq_cmpeqv_trystorev_storev_release(
+				targetptr_final, offset, targetptr_spec,
+				newval_spec, newval_final, cpu);
+		else
+			ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
+				offset, targetptr_spec, newval_spec,
+				newval_final, cpu);
+		if (rseq_likely(!ret)) {
+			result = true;
+			break;
+		}
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+	return result;
+}
+
+struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
+					       int *_cpu)
+{
+	struct percpu_buffer_node *head;
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr, newval;
+		intptr_t offset;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		/* Load offset with single-copy atomicity. */
+		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+		if (offset == 0) {
+			head = NULL;
+			break;
+		}
+		head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
+		newval = offset - 1;
+		targetptr = (intptr_t *)&buffer->c[cpu].offset;
+		ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
+			(intptr_t *)&buffer->c[cpu].array[offset - 1],
+			(intptr_t)head, newval, cpu);
+		if (rseq_likely(!ret))
+			break;
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+	return head;
+}
+
+/*
+ * __percpu_buffer_pop is not safe against concurrent accesses. Should
+ * only be used on buffers that are not concurrently modified.
+ */
+struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
+					       int cpu)
+{
+	struct percpu_buffer_node *head;
+	intptr_t offset;
+
+	offset = buffer->c[cpu].offset;
+	if (offset == 0)
+		return NULL;
+	head = buffer->c[cpu].array[offset - 1];
+	buffer->c[cpu].offset = offset - 1;
+	return head;
+}
+
+void *test_percpu_buffer_thread(void *arg)
+{
+	long long i, reps;
+	struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
+
+	if (!opt_disable_rseq && rseq_register_current_thread())
+		abort();
+
+	reps = opt_reps;
+	for (i = 0; i < reps; i++) {
+		struct percpu_buffer_node *node;
+
+		node = this_cpu_buffer_pop(buffer, NULL);
+		if (opt_yield)
+			sched_yield();  /* encourage shuffling */
+		if (node) {
+			if (!this_cpu_buffer_push(buffer, node, NULL)) {
+				/* Should increase buffer size. */
+				abort();
+			}
+		}
+	}
+
+	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+		       (int) rseq_gettid(), nr_abort, signals_delivered);
+	if (!opt_disable_rseq && rseq_unregister_current_thread())
+		abort();
+
+	return NULL;
+}
+
+/* Simultaneous modification to a per-cpu buffer from many threads.  */
+void test_percpu_buffer(void)
+{
+	const int num_threads = opt_threads;
+	int i, j, ret;
+	uint64_t sum = 0, expected_sum = 0;
+	struct percpu_buffer buffer;
+	pthread_t test_threads[num_threads];
+	cpu_set_t allowed_cpus;
+
+	memset(&buffer, 0, sizeof(buffer));
+
+	/* Generate list entries for every usable cpu. */
+	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+		/* Worse-case is every item in same CPU. */
+		buffer.c[i].array =
+			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+			       BUFFER_ITEM_PER_CPU);
+		assert(buffer.c[i].array);
+		buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
+		for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
+			struct percpu_buffer_node *node;
+
+			expected_sum += j;
+
+			/*
+			 * We could theoretically put the word-sized
+			 * "data" directly in the buffer. However, we
+			 * want to model objects that would not fit
+			 * within a single word, so allocate an object
+			 * for each node.
+			 */
+			node = malloc(sizeof(*node));
+			assert(node);
+			node->data = j;
+			buffer.c[i].array[j - 1] = node;
+			buffer.c[i].offset++;
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_create(&test_threads[i], NULL,
+				     test_percpu_buffer_thread, &buffer);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(test_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		struct percpu_buffer_node *node;
+
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+
+		while ((node = __percpu_buffer_pop(&buffer, i))) {
+			sum += node->data;
+			free(node);
+		}
+		free(buffer.c[i].array);
+	}
+
+	/*
+	 * All entries should now be accounted for (unless some external
+	 * actor is interfering with our allowed affinity while this
+	 * test is running).
+	 */
+	assert(sum == expected_sum);
+}
+
+bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
+				 struct percpu_memcpy_buffer_node item,
+				 int *_cpu)
+{
+	bool result = false;
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr_final, newval_final, offset;
+		char *destptr, *srcptr;
+		size_t copylen;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		/* Load offset with single-copy atomicity. */
+		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+		if (offset == buffer->c[cpu].buflen)
+			break;
+		destptr = (char *)&buffer->c[cpu].array[offset];
+		srcptr = (char *)&item;
+		/* copylen must be <= 4kB. */
+		copylen = sizeof(item);
+		newval_final = offset + 1;
+		targetptr_final = &buffer->c[cpu].offset;
+		if (opt_mb)
+			ret = rseq_cmpeqv_trymemcpy_storev_release(
+				targetptr_final, offset,
+				destptr, srcptr, copylen,
+				newval_final, cpu);
+		else
+			ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
+				offset, destptr, srcptr, copylen,
+				newval_final, cpu);
+		if (rseq_likely(!ret)) {
+			result = true;
+			break;
+		}
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+	return result;
+}
+
+bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+				struct percpu_memcpy_buffer_node *item,
+				int *_cpu)
+{
+	bool result = false;
+	int cpu;
+
+	for (;;) {
+		intptr_t *targetptr_final, newval_final, offset;
+		char *destptr, *srcptr;
+		size_t copylen;
+		int ret;
+
+		cpu = rseq_cpu_start();
+		/* Load offset with single-copy atomicity. */
+		offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+		if (offset == 0)
+			break;
+		destptr = (char *)item;
+		srcptr = (char *)&buffer->c[cpu].array[offset - 1];
+		/* copylen must be <= 4kB. */
+		copylen = sizeof(*item);
+		newval_final = offset - 1;
+		targetptr_final = &buffer->c[cpu].offset;
+		ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
+			offset, destptr, srcptr, copylen,
+			newval_final, cpu);
+		if (rseq_likely(!ret)) {
+			result = true;
+			break;
+		}
+		/* Retry if comparison fails or rseq aborts. */
+	}
+	if (_cpu)
+		*_cpu = cpu;
+	return result;
+}
+
+/*
+ * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
+ * only be used on buffers that are not concurrently modified.
+ */
+bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+				struct percpu_memcpy_buffer_node *item,
+				int cpu)
+{
+	intptr_t offset;
+
+	offset = buffer->c[cpu].offset;
+	if (offset == 0)
+		return false;
+	memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
+	buffer->c[cpu].offset = offset - 1;
+	return true;
+}
+
+void *test_percpu_memcpy_buffer_thread(void *arg)
+{
+	long long i, reps;
+	struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
+
+	if (!opt_disable_rseq && rseq_register_current_thread())
+		abort();
+
+	reps = opt_reps;
+	for (i = 0; i < reps; i++) {
+		struct percpu_memcpy_buffer_node item;
+		bool result;
+
+		result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
+		if (opt_yield)
+			sched_yield();  /* encourage shuffling */
+		if (result) {
+			if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
+				/* Should increase buffer size. */
+				abort();
+			}
+		}
+	}
+
+	printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+		       (int) rseq_gettid(), nr_abort, signals_delivered);
+	if (!opt_disable_rseq && rseq_unregister_current_thread())
+		abort();
+
+	return NULL;
+}
+
+/* Simultaneous modification to a per-cpu buffer from many threads.  */
+void test_percpu_memcpy_buffer(void)
+{
+	const int num_threads = opt_threads;
+	int i, j, ret;
+	uint64_t sum = 0, expected_sum = 0;
+	struct percpu_memcpy_buffer buffer;
+	pthread_t test_threads[num_threads];
+	cpu_set_t allowed_cpus;
+
+	memset(&buffer, 0, sizeof(buffer));
+
+	/* Generate list entries for every usable cpu. */
+	sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+		/* Worse-case is every item in same CPU. */
+		buffer.c[i].array =
+			malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+			       MEMCPY_BUFFER_ITEM_PER_CPU);
+		assert(buffer.c[i].array);
+		buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
+		for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
+			expected_sum += 2 * j + 1;
+
+			/*
+			 * We could theoretically put the word-sized
+			 * "data" directly in the buffer. However, we
+			 * want to model objects that would not fit
+			 * within a single word, so allocate an object
+			 * for each node.
+			 */
+			buffer.c[i].array[j - 1].data1 = j;
+			buffer.c[i].array[j - 1].data2 = j + 1;
+			buffer.c[i].offset++;
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_create(&test_threads[i], NULL,
+				     test_percpu_memcpy_buffer_thread,
+				     &buffer);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(test_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		struct percpu_memcpy_buffer_node item;
+
+		if (!CPU_ISSET(i, &allowed_cpus))
+			continue;
+
+		while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
+			sum += item.data1;
+			sum += item.data2;
+		}
+		free(buffer.c[i].array);
+	}
+
+	/*
+	 * All entries should now be accounted for (unless some external
+	 * actor is interfering with our allowed affinity while this
+	 * test is running).
+	 */
+	assert(sum == expected_sum);
+}
+
+static void test_signal_interrupt_handler(int signo)
+{
+	signals_delivered++;
+}
+
+static int set_signal_handler(void)
+{
+	int ret = 0;
+	struct sigaction sa;
+	sigset_t sigset;
+
+	ret = sigemptyset(&sigset);
+	if (ret < 0) {
+		perror("sigemptyset");
+		return ret;
+	}
+
+	sa.sa_handler = test_signal_interrupt_handler;
+	sa.sa_mask = sigset;
+	sa.sa_flags = 0;
+	ret = sigaction(SIGUSR1, &sa, NULL);
+	if (ret < 0) {
+		perror("sigaction");
+		return ret;
+	}
+
+	printf_verbose("Signal handler set for SIGUSR1\n");
+
+	return ret;
+}
+
+static void show_usage(int argc, char **argv)
+{
+	printf("Usage : %s <OPTIONS>\n",
+		argv[0]);
+	printf("OPTIONS:\n");
+	printf("	[-1 loops] Number of loops for delay injection 1\n");
+	printf("	[-2 loops] Number of loops for delay injection 2\n");
+	printf("	[-3 loops] Number of loops for delay injection 3\n");
+	printf("	[-4 loops] Number of loops for delay injection 4\n");
+	printf("	[-5 loops] Number of loops for delay injection 5\n");
+	printf("	[-6 loops] Number of loops for delay injection 6\n");
+	printf("	[-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
+	printf("	[-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
+	printf("	[-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
+	printf("	[-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
+	printf("	[-y] Yield\n");
+	printf("	[-k] Kill thread with signal\n");
+	printf("	[-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
+	printf("	[-t N] Number of threads (default 200)\n");
+	printf("	[-r N] Number of repetitions per thread (default 5000)\n");
+	printf("	[-d] Disable rseq system call (no initialization)\n");
+	printf("	[-D M] Disable rseq for each M threads\n");
+	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+	printf("	[-M] Push into buffer and memcpy buffer with memory barriers.\n");
+	printf("	[-v] Verbose output.\n");
+	printf("	[-h] Show this help.\n");
+	printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	int i;
+
+	for (i = 1; i < argc; i++) {
+		if (argv[i][0] != '-')
+			continue;
+		switch (argv[i][1]) {
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
+			i++;
+			break;
+		case 'm':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_modulo = atol(argv[i + 1]);
+			if (opt_modulo < 0) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 's':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_sleep = atol(argv[i + 1]);
+			if (opt_sleep < 0) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 'y':
+			opt_yield = 1;
+			break;
+		case 'k':
+			opt_signal = 1;
+			break;
+		case 'd':
+			opt_disable_rseq = 1;
+			break;
+		case 'D':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_disable_mod = atol(argv[i + 1]);
+			if (opt_disable_mod < 0) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 't':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_threads = atol(argv[i + 1]);
+			if (opt_threads < 0) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 'r':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_reps = atoll(argv[i + 1]);
+			if (opt_reps < 0) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 'h':
+			show_usage(argc, argv);
+			goto end;
+		case 'T':
+			if (argc < i + 2) {
+				show_usage(argc, argv);
+				goto error;
+			}
+			opt_test = *argv[i + 1];
+			switch (opt_test) {
+			case 's':
+			case 'l':
+			case 'i':
+			case 'b':
+			case 'm':
+				break;
+			default:
+				show_usage(argc, argv);
+				goto error;
+			}
+			i++;
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		case 'M':
+			opt_mb = 1;
+			break;
+		default:
+			show_usage(argc, argv);
+			goto error;
+		}
+	}
+
+	loop_cnt_1 = loop_cnt[1];
+	loop_cnt_2 = loop_cnt[2];
+	loop_cnt_3 = loop_cnt[3];
+	loop_cnt_4 = loop_cnt[4];
+	loop_cnt_5 = loop_cnt[5];
+	loop_cnt_6 = loop_cnt[6];
+
+	if (set_signal_handler())
+		goto error;
+
+	if (!opt_disable_rseq && rseq_register_current_thread())
+		goto error;
+	switch (opt_test) {
+	case 's':
+		printf_verbose("spinlock\n");
+		test_percpu_spinlock();
+		break;
+	case 'l':
+		printf_verbose("linked list\n");
+		test_percpu_list();
+		break;
+	case 'b':
+		printf_verbose("buffer\n");
+		test_percpu_buffer();
+		break;
+	case 'm':
+		printf_verbose("memcpy buffer\n");
+		test_percpu_memcpy_buffer();
+		break;
+	case 'i':
+		printf_verbose("counter increment\n");
+		test_percpu_inc();
+		break;
+	}
+	if (!opt_disable_rseq && rseq_unregister_current_thread())
+		abort();
+end:
+	return 0;
+
+error:
+	return -1;
+}
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
new file mode 100644
index 000000000..3cea19877
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -0,0 +1,716 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-arm.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#define RSEQ_SIG	0x53053053
+
+#define rseq_smp_mb()	__asm__ __volatile__ ("dmb" ::: "memory", "cc")
+#define rseq_smp_rmb()	__asm__ __volatile__ ("dmb" ::: "memory", "cc")
+#define rseq_smp_wmb()	__asm__ __volatile__ ("dmb" ::: "memory", "cc")
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_smp_mb();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_smp_mb();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags,	start_ip,		\
+				post_commit_offset, abort_ip)		\
+		".pushsection __rseq_table, \"aw\"\n\t"			\
+		".balign 32\n\t"					\
+		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip)	\
+	__RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip,			\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
+		RSEQ_INJECT_ASM(1)					\
+		"adr r0, " __rseq_str(cs_label) "\n\t"			\
+		"str r0, %[" __rseq_str(rseq_cs) "]\n\t"		\
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
+		RSEQ_INJECT_ASM(2)					\
+		"ldr r0, %[" __rseq_str(current_cpu_id) "]\n\t"	\
+		"cmp %[" __rseq_str(cpu_id) "], r0\n\t"		\
+		"bne " __rseq_str(label) "\n\t"
+
+#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown,		\
+				abort_label, version, flags,		\
+				start_ip, post_commit_offset, abort_ip)	\
+		".balign 32\n\t"					\
+		__rseq_str(table_label) ":\n\t"				\
+		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+		".word " __rseq_str(RSEQ_SIG) "\n\t"			\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"b %l[" __rseq_str(abort_label) "]\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
+			      start_ip, post_commit_ip, abort_ip)	\
+	__RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown,		\
+				abort_label, 0x0, 0x0, start_ip,	\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label)		\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+#define rseq_workaround_gcc_asm_size_guess()	__asm__ __volatile__("")
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[error2]\n\t"
+#endif
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expectnot], r0\n\t"
+		"beq %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"ldr r0, %[v]\n\t"
+		"cmp %[expectnot], r0\n\t"
+		"beq %l[error2]\n\t"
+#endif
+		"str r0, %[load]\n\t"
+		"add r0, %[voffp]\n\t"
+		"ldr r0, [r0]\n\t"
+		/* final store */
+		"str r0, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"Ir" (voffp),
+		  [load]		"m" (*load)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		"ldr r0, %[v]\n\t"
+		"add r0, %[count]\n\t"
+		/* final store */
+		"str r0, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [count]		"Ir" (count)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[error2]\n\t"
+#endif
+		/* try store */
+		"str %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[error2]\n\t"
+#endif
+		/* try store */
+		"str %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		"dmb\n\t"	/* full mb provides store-release */
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		"ldr r0, %[v2]\n\t"
+		"cmp %[expect2], r0\n\t"
+		"bne %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne %l[error2]\n\t"
+		"ldr r0, %[v2]\n\t"
+		"cmp %[expect2], r0\n\t"
+		"bne %l[error3]\n\t"
+#endif
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uint32_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		"str %[src], %[rseq_scratch0]\n\t"
+		"str %[dst], %[rseq_scratch1]\n\t"
+		"str %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne 7f\n\t"
+#endif
+		/* try memcpy */
+		"cmp %[len], #0\n\t" \
+		"beq 333f\n\t" \
+		"222:\n\t" \
+		"ldrb %%r0, [%[src]]\n\t" \
+		"strb %%r0, [%[dst]]\n\t" \
+		"adds %[src], #1\n\t" \
+		"adds %[dst], #1\n\t" \
+		"subs %[len], #1\n\t" \
+		"bne 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		"ldr %[len], %[rseq_scratch2]\n\t"
+		"ldr %[dst], %[rseq_scratch1]\n\t"
+		"ldr %[src], %[rseq_scratch0]\n\t"
+		"b 8f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4,
+				      /* teardown */
+				      "ldr %[len], %[rseq_scratch2]\n\t"
+				      "ldr %[dst], %[rseq_scratch1]\n\t"
+				      "ldr %[src], %[rseq_scratch0]\n\t",
+				      abort, 1b, 2b, 4f)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					error2)
+#endif
+		"8:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	uint32_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		"str %[src], %[rseq_scratch0]\n\t"
+		"str %[dst], %[rseq_scratch1]\n\t"
+		"str %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		"ldr r0, %[v]\n\t"
+		"cmp %[expect], r0\n\t"
+		"bne 7f\n\t"
+#endif
+		/* try memcpy */
+		"cmp %[len], #0\n\t" \
+		"beq 333f\n\t" \
+		"222:\n\t" \
+		"ldrb %%r0, [%[src]]\n\t" \
+		"strb %%r0, [%[dst]]\n\t" \
+		"adds %[src], #1\n\t" \
+		"adds %[dst], #1\n\t" \
+		"subs %[len], #1\n\t" \
+		"bne 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		"dmb\n\t"	/* full mb provides store-release */
+		/* final store */
+		"str %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		"ldr %[len], %[rseq_scratch2]\n\t"
+		"ldr %[dst], %[rseq_scratch1]\n\t"
+		"ldr %[src], %[rseq_scratch0]\n\t"
+		"b 8f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4,
+				      /* teardown */
+				      "ldr %[len], %[rseq_scratch2]\n\t"
+				      "ldr %[dst], %[rseq_scratch1]\n\t"
+				      "ldr %[src], %[rseq_scratch0]\n\t",
+				      abort, 1b, 2b, 4f)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+					/* teardown */
+					"ldr %[len], %[rseq_scratch2]\n\t"
+					"ldr %[dst], %[rseq_scratch1]\n\t"
+					"ldr %[src], %[rseq_scratch0]\n\t",
+					error2)
+#endif
+		"8:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "r0", "memory", "cc"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
new file mode 100644
index 000000000..954f34671
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -0,0 +1,594 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-arm64.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2018 - Will Deacon <will.deacon@arm.com>
+ */
+
+#define RSEQ_SIG	0xd428bc00	/* BRK #0x45E0 */
+
+#define rseq_smp_mb()	__asm__ __volatile__ ("dmb ish" ::: "memory")
+#define rseq_smp_rmb()	__asm__ __volatile__ ("dmb ishld" ::: "memory")
+#define rseq_smp_wmb()	__asm__ __volatile__ ("dmb ishst" ::: "memory")
+
+#define rseq_smp_load_acquire(p)						\
+__extension__ ({								\
+	__typeof(*p) ____p1;							\
+	switch (sizeof(*p)) {							\
+	case 1:									\
+		asm volatile ("ldarb %w0, %1"					\
+			: "=r" (*(__u8 *)p)					\
+			: "Q" (*p) : "memory");					\
+		break;								\
+	case 2:									\
+		asm volatile ("ldarh %w0, %1"					\
+			: "=r" (*(__u16 *)p)					\
+			: "Q" (*p) : "memory");					\
+		break;								\
+	case 4:									\
+		asm volatile ("ldar %w0, %1"					\
+			: "=r" (*(__u32 *)p)					\
+			: "Q" (*p) : "memory");					\
+		break;								\
+	case 8:									\
+		asm volatile ("ldar %0, %1"					\
+			: "=r" (*(__u64 *)p)					\
+			: "Q" (*p) : "memory");					\
+		break;								\
+	}									\
+	____p1;									\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)						\
+do {										\
+	switch (sizeof(*p)) {							\
+	case 1:									\
+		asm volatile ("stlrb %w1, %0"					\
+				: "=Q" (*p)					\
+				: "r" ((__u8)v)					\
+				: "memory");					\
+		break;								\
+	case 2:									\
+		asm volatile ("stlrh %w1, %0"					\
+				: "=Q" (*p)					\
+				: "r" ((__u16)v)				\
+				: "memory");					\
+		break;								\
+	case 4:									\
+		asm volatile ("stlr %w1, %0"					\
+				: "=Q" (*p)					\
+				: "r" ((__u32)v)				\
+				: "memory");					\
+		break;								\
+	case 8:									\
+		asm volatile ("stlr %1, %0"					\
+				: "=Q" (*p)					\
+				: "r" ((__u64)v)				\
+				: "memory");					\
+		break;								\
+	}									\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define RSEQ_ASM_TMP_REG32	"w15"
+#define RSEQ_ASM_TMP_REG	"x15"
+#define RSEQ_ASM_TMP_REG_2	"x14"
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip,		\
+				post_commit_offset, abort_ip)			\
+	"	.pushsection	__rseq_table, \"aw\"\n"				\
+	"	.balign	32\n"							\
+	__rseq_str(label) ":\n"							\
+	"	.long	" __rseq_str(version) ", " __rseq_str(flags) "\n"	\
+	"	.quad	" __rseq_str(start_ip) ", "				\
+			  __rseq_str(post_commit_offset) ", "			\
+			  __rseq_str(abort_ip) "\n"				\
+	"	.popsection\n"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip)	\
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,			\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)			\
+	RSEQ_INJECT_ASM(1)							\
+	"	adrp	" RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n"	\
+	"	add	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", :lo12:" __rseq_str(cs_label) "\n"			\
+	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n"	\
+	__rseq_str(label) ":\n"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label)				\
+	"	b	222f\n"							\
+	"	.inst 	"	__rseq_str(RSEQ_SIG) "\n"			\
+	__rseq_str(label) ":\n"							\
+	"	b	%l[" __rseq_str(abort_label) "]\n"			\
+	"222:\n"
+
+#define RSEQ_ASM_OP_STORE(value, var)						\
+	"	str	%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_STORE_RELEASE(value, var)					\
+	"	stlr	%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label)			\
+	RSEQ_ASM_OP_STORE(value, var)						\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label)		\
+	RSEQ_ASM_OP_STORE_RELEASE(value, var)					\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label)					\
+	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
+	"	sub	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(expect) "]\n"				\
+	"	cbnz	" RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPEQ32(var, expect, label)					\
+	"	ldr	" RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n"	\
+	"	sub	" RSEQ_ASM_TMP_REG32 ", " RSEQ_ASM_TMP_REG32		\
+			", %w[" __rseq_str(expect) "]\n"			\
+	"	cbnz	" RSEQ_ASM_TMP_REG32 ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPNE(var, expect, label)					\
+	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
+	"	sub	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(expect) "]\n"				\
+	"	cbz	" RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)			\
+	RSEQ_INJECT_ASM(2)							\
+	RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label)
+
+#define RSEQ_ASM_OP_R_LOAD(var)							\
+	"	ldr	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE(var)						\
+	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_LOAD_OFF(offset)						\
+	"	ldr	" RSEQ_ASM_TMP_REG ", [" RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(offset) "]]\n"
+
+#define RSEQ_ASM_OP_R_ADD(count)						\
+	"	add	" RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG		\
+			", %[" __rseq_str(count) "]\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label)			\
+	"	str	" RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"		\
+	__rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)					\
+	"	cbz	%[" __rseq_str(len) "], 333f\n"				\
+	"	mov	" RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n"	\
+	"222:	sub	" RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n"	\
+	"	ldrb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]"	\
+			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
+	"	strb	" RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]"	\
+			", " RSEQ_ASM_TMP_REG_2 "]\n"				\
+	"	cbnz	" RSEQ_ASM_TMP_REG_2 ", 222b\n"				\
+	"333:\n"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"Qo" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
+#endif
+		RSEQ_ASM_OP_R_LOAD(v)
+		RSEQ_ASM_OP_R_STORE(load)
+		RSEQ_ASM_OP_R_LOAD_OFF(voffp)
+		RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"Qo" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [load]		"Qo" (*load),
+		  [voffp]		"r" (voffp)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		RSEQ_ASM_OP_R_LOAD(v)
+		RSEQ_ASM_OP_R_ADD(count)
+		RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"Qo" (*v),
+		  [count]		"r" (count)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		RSEQ_ASM_OP_STORE(newv2, v2)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [expect]		"r" (expect),
+		  [v]			"Qo" (*v),
+		  [newv]		"r" (newv),
+		  [v2]			"Qo" (*v2),
+		  [newv2]		"r" (newv2)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		RSEQ_ASM_OP_STORE(newv2, v2)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [expect]		"r" (expect),
+		  [v]			"Qo" (*v),
+		  [newv]		"r" (newv),
+		  [v2]			"Qo" (*v2),
+		  [newv2]		"r" (newv2)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+		RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
+#endif
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"Qo" (*v),
+		  [expect]		"r" (expect),
+		  [v2]			"Qo" (*v2),
+		  [expect2]		"r" (expect2),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [expect]		"r" (expect),
+		  [v]			"Qo" (*v),
+		  [newv]		"r" (newv),
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+		RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"Qo" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [expect]		"r" (expect),
+		  [v]			"Qo" (*v),
+		  [newv]		"r" (newv),
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len)
+		  RSEQ_INJECT_INPUT
+		: "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
new file mode 100644
index 000000000..7f48ecf46
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -0,0 +1,725 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Author: Paul Burton <paul.burton@mips.com>
+ * (C) Copyright 2018 MIPS Tech LLC
+ *
+ * Based on rseq-arm.h:
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#define RSEQ_SIG	0x53053053
+
+#define rseq_smp_mb()	__asm__ __volatile__ ("sync" ::: "memory")
+#define rseq_smp_rmb()	rseq_smp_mb()
+#define rseq_smp_wmb()	rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_smp_mb();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_smp_mb();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#if _MIPS_SZLONG == 64
+# define LONG			".dword"
+# define LONG_LA		"dla"
+# define LONG_L			"ld"
+# define LONG_S			"sd"
+# define LONG_ADDI		"daddiu"
+# define U32_U64_PAD(x)		x
+#elif _MIPS_SZLONG == 32
+# define LONG			".word"
+# define LONG_LA		"la"
+# define LONG_L			"lw"
+# define LONG_S			"sw"
+# define LONG_ADDI		"addiu"
+# ifdef __BIG_ENDIAN
+#  define U32_U64_PAD(x)	"0x0, " x
+# else
+#  define U32_U64_PAD(x)	x ", 0x0"
+# endif
+#else
+# error unsupported _MIPS_SZLONG
+#endif
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags,	start_ip, \
+				post_commit_offset, abort_ip) \
+		".pushsection __rseq_table, \"aw\"\n\t" \
+		".balign 32\n\t" \
+		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+		RSEQ_INJECT_ASM(1) \
+		LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \
+		LONG_S  " $4, %[" __rseq_str(rseq_cs) "]\n\t" \
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+		RSEQ_INJECT_ASM(2) \
+		"lw  $4, %[" __rseq_str(current_cpu_id) "]\n\t" \
+		"bne $4, %[" __rseq_str(cpu_id) "], " __rseq_str(label) "\n\t"
+
+#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+				abort_label, version, flags, \
+				start_ip, post_commit_offset, abort_ip) \
+		".balign 32\n\t" \
+		__rseq_str(table_label) ":\n\t" \
+		".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+		LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+		".word " __rseq_str(RSEQ_SIG) "\n\t" \
+		__rseq_str(label) ":\n\t" \
+		teardown \
+		"b %l[" __rseq_str(abort_label) "]\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
+			      start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+				abort_label, 0x0, 0x0, start_ip, \
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+		__rseq_str(label) ":\n\t" \
+		teardown \
+		"b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+#define rseq_workaround_gcc_asm_size_guess()	__asm__ __volatile__("")
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+#endif
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"beq $4, %[expectnot], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"beq $4, %[expectnot], %l[error2]\n\t"
+#endif
+		LONG_S " $4, %[load]\n\t"
+		LONG_ADDI " $4, %[voffp]\n\t"
+		LONG_L " $4, 0($4)\n\t"
+		/* final store */
+		LONG_S " $4, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"Ir" (voffp),
+		  [load]		"m" (*load)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		LONG_L " $4, %[v]\n\t"
+		LONG_ADDI " $4, %[count]\n\t"
+		/* final store */
+		LONG_S " $4, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [count]		"Ir" (count)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+#endif
+		/* try store */
+		LONG_S " %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+#endif
+		/* try store */
+		LONG_S " %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		"sync\n\t"	/* full sync provides store-release */
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		LONG_L " $4, %[v2]\n\t"
+		"bne $4, %[expect2], %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], %l[error2]\n\t"
+		LONG_L " $4, %[v2]\n\t"
+		"bne $4, %[expect2], %l[error3]\n\t"
+#endif
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		"b 5f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+		"5:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uintptr_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		LONG_S " %[src], %[rseq_scratch0]\n\t"
+		LONG_S "  %[dst], %[rseq_scratch1]\n\t"
+		LONG_S " %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 7f\n\t"
+#endif
+		/* try memcpy */
+		"beqz %[len], 333f\n\t" \
+		"222:\n\t" \
+		"lb   $4, 0(%[src])\n\t" \
+		"sb   $4, 0(%[dst])\n\t" \
+		LONG_ADDI " %[src], 1\n\t" \
+		LONG_ADDI " %[dst], 1\n\t" \
+		LONG_ADDI " %[len], -1\n\t" \
+		"bnez %[len], 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		LONG_L " %[len], %[rseq_scratch2]\n\t"
+		LONG_L " %[dst], %[rseq_scratch1]\n\t"
+		LONG_L " %[src], %[rseq_scratch0]\n\t"
+		"b 8f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4,
+				      /* teardown */
+				      LONG_L " %[len], %[rseq_scratch2]\n\t"
+				      LONG_L " %[dst], %[rseq_scratch1]\n\t"
+				      LONG_L " %[src], %[rseq_scratch0]\n\t",
+				      abort, 1b, 2b, 4f)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error2)
+#endif
+		"8:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	uintptr_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	rseq_workaround_gcc_asm_size_guess();
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+		LONG_S " %[src], %[rseq_scratch0]\n\t"
+		LONG_S " %[dst], %[rseq_scratch1]\n\t"
+		LONG_S " %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		LONG_L " $4, %[v]\n\t"
+		"bne $4, %[expect], 7f\n\t"
+#endif
+		/* try memcpy */
+		"beqz %[len], 333f\n\t" \
+		"222:\n\t" \
+		"lb   $4, 0(%[src])\n\t" \
+		"sb   $4, 0(%[dst])\n\t" \
+		LONG_ADDI " %[src], 1\n\t" \
+		LONG_ADDI " %[dst], 1\n\t" \
+		LONG_ADDI " %[len], -1\n\t" \
+		"bnez %[len], 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		"sync\n\t"	/* full sync provides store-release */
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		LONG_L " %[len], %[rseq_scratch2]\n\t"
+		LONG_L " %[dst], %[rseq_scratch1]\n\t"
+		LONG_L " %[src], %[rseq_scratch0]\n\t"
+		"b 8f\n\t"
+		RSEQ_ASM_DEFINE_ABORT(3, 4,
+				      /* teardown */
+				      LONG_L " %[len], %[rseq_scratch2]\n\t"
+				      LONG_L " %[dst], %[rseq_scratch1]\n\t"
+				      LONG_L " %[src], %[rseq_scratch0]\n\t",
+				      abort, 1b, 2b, 4f)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+					/* teardown */
+					LONG_L " %[len], %[rseq_scratch2]\n\t"
+					LONG_L " %[dst], %[rseq_scratch1]\n\t"
+					LONG_L " %[src], %[rseq_scratch0]\n\t",
+					error2)
+#endif
+		"8:\n\t"
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "$4", "memory"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	rseq_workaround_gcc_asm_size_guess();
+	return 0;
+abort:
+	rseq_workaround_gcc_asm_size_guess();
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	rseq_workaround_gcc_asm_size_guess();
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_workaround_gcc_asm_size_guess();
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
new file mode 100644
index 000000000..52630c9f4
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -0,0 +1,671 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-ppc.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com>
+ */
+
+#define RSEQ_SIG	0x53053053
+
+#define rseq_smp_mb()		__asm__ __volatile__ ("sync"	::: "memory", "cc")
+#define rseq_smp_lwsync()	__asm__ __volatile__ ("lwsync"	::: "memory", "cc")
+#define rseq_smp_rmb()		rseq_smp_lwsync()
+#define rseq_smp_wmb()		rseq_smp_lwsync()
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_smp_lwsync();						\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_lwsync()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_smp_lwsync();						\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+/*
+ * The __rseq_table section can be used by debuggers to better handle
+ * single-stepping through the restartable critical sections.
+ */
+
+#ifdef __PPC64__
+
+#define STORE_WORD	"std "
+#define LOAD_WORD	"ld "
+#define LOADX_WORD	"ldx "
+#define CMP_WORD	"cmpd "
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,				\
+			start_ip, post_commit_offset, abort_ip)			\
+		".pushsection __rseq_table, \"aw\"\n\t"				\
+		".balign 32\n\t"						\
+		__rseq_str(label) ":\n\t"					\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t"	\
+		".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)			\
+		RSEQ_INJECT_ASM(1)						\
+		"lis %%r17, (" __rseq_str(cs_label) ")@highest\n\t"		\
+		"ori %%r17, %%r17, (" __rseq_str(cs_label) ")@higher\n\t"	\
+		"rldicr %%r17, %%r17, 32, 31\n\t"				\
+		"oris %%r17, %%r17, (" __rseq_str(cs_label) ")@high\n\t"	\
+		"ori %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t"		\
+		"std %%r17, %[" __rseq_str(rseq_cs) "]\n\t"			\
+		__rseq_str(label) ":\n\t"
+
+#else /* #ifdef __PPC64__ */
+
+#define STORE_WORD	"stw "
+#define LOAD_WORD	"lwz "
+#define LOADX_WORD	"lwzx "
+#define CMP_WORD	"cmpw "
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,				\
+			start_ip, post_commit_offset, abort_ip)			\
+		".pushsection __rseq_table, \"aw\"\n\t"				\
+		".balign 32\n\t"						\
+		__rseq_str(label) ":\n\t"					\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t"	\
+		/* 32-bit only supported on BE */				\
+		".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)			\
+		RSEQ_INJECT_ASM(1)						\
+		"lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t"			\
+		"addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t"		\
+		"stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t"			\
+		__rseq_str(label) ":\n\t"
+
+#endif /* #ifdef __PPC64__ */
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip)	\
+		__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		\
+					(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)			\
+		RSEQ_INJECT_ASM(2)						\
+		"lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t"		\
+		"cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t"		\
+		"bne- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label)				\
+		".pushsection __rseq_failure, \"ax\"\n\t"			\
+		".long " __rseq_str(RSEQ_SIG) "\n\t"				\
+		__rseq_str(label) ":\n\t"					\
+		"b %l[" __rseq_str(abort_label) "]\n\t"				\
+		".popsection\n\t"
+
+/*
+ * RSEQ_ASM_OPs: asm operations for rseq
+ * 	RSEQ_ASM_OP_R_*: has hard-code registers in it
+ * 	RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7)
+ */
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label)					\
+		LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"			\
+		CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t"		\
+		"bne- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_OP_CMPNE(var, expectnot, label)				\
+		LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"			\
+		CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t"		\
+		"beq- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_OP_STORE(value, var)						\
+		STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
+
+/* Load @var to r17 */
+#define RSEQ_ASM_OP_R_LOAD(var)							\
+		LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+
+/* Store r17 to @var */
+#define RSEQ_ASM_OP_R_STORE(var)						\
+		STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+
+/* Add @count to r17 */
+#define RSEQ_ASM_OP_R_ADD(count)						\
+		"add %%r17, %[" __rseq_str(count) "], %%r17\n\t"
+
+/* Load (r17 + voffp) to r17 */
+#define RSEQ_ASM_OP_R_LOADX(voffp)						\
+		LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
+
+/* TODO: implement a faster memcpy. */
+#define RSEQ_ASM_OP_R_MEMCPY() \
+		"cmpdi %%r19, 0\n\t" \
+		"beq 333f\n\t" \
+		"addi %%r20, %%r20, -1\n\t" \
+		"addi %%r21, %%r21, -1\n\t" \
+		"222:\n\t" \
+		"lbzu %%r18, 1(%%r20)\n\t" \
+		"stbu %%r18, 1(%%r21)\n\t" \
+		"addi %%r19, %%r19, -1\n\t" \
+		"cmpdi %%r19, 0\n\t" \
+		"bne 222b\n\t" \
+		"333:\n\t" \
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label)			\
+		STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"			\
+		__rseq_str(post_commit_label) ":\n\t"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label)			\
+		STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
+		__rseq_str(post_commit_label) ":\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v not equal to @expectnot */
+		RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v not equal to @expectnot */
+		RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
+#endif
+		/* load the value of @v */
+		RSEQ_ASM_OP_R_LOAD(v)
+		/* store it in @load */
+		RSEQ_ASM_OP_R_STORE(load)
+		/* dereference voffp(v) */
+		RSEQ_ASM_OP_R_LOADX(voffp)
+		/* final store the value at voffp(v) */
+		RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"b" (voffp),
+		  [load]		"m" (*load)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		/* load the value of @v */
+		RSEQ_ASM_OP_R_LOAD(v)
+		/* add @count to it */
+		RSEQ_ASM_OP_R_ADD(count)
+		/* final store */
+		RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [count]		"r" (count)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		/* try store */
+		RSEQ_ASM_OP_STORE(newv2, v2)
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		/* try store */
+		RSEQ_ASM_OP_STORE(newv2, v2)
+		RSEQ_INJECT_ASM(5)
+		/* for 'release' */
+		"lwsync\n\t"
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+		/* cmp @v2 equal to @expct2 */
+		RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+		/* cmp @v2 equal to @expct2 */
+		RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
+#endif
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* setup for mempcy */
+		"mr %%r19, %[len]\n\t"
+		"mr %%r20, %[src]\n\t"
+		"mr %%r21, %[dst]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		/* try memcpy */
+		RSEQ_ASM_OP_R_MEMCPY()
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17", "r18", "r19", "r20", "r21"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* setup for mempcy */
+		"mr %%r19, %[len]\n\t"
+		"mr %%r20, %[src]\n\t"
+		"mr %%r21, %[dst]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		/* cmp cpuid */
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		/* cmp @v equal to @expect */
+		RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+		/* try memcpy */
+		RSEQ_ASM_OP_R_MEMCPY()
+		RSEQ_INJECT_ASM(5)
+		/* for 'release' */
+		"lwsync\n\t"
+		/* final store */
+		RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		RSEQ_ASM_DEFINE_ABORT(4, abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r17", "r18", "r19", "r20", "r21"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#undef STORE_WORD
+#undef LOAD_WORD
+#undef LOADX_WORD
+#undef CMP_WORD
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
new file mode 100644
index 000000000..1069e8525
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -0,0 +1,513 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#define RSEQ_SIG	0x53053053
+
+#define rseq_smp_mb()	__asm__ __volatile__ ("bcr 15,0" ::: "memory")
+#define rseq_smp_rmb()	rseq_smp_mb()
+#define rseq_smp_wmb()	rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_barrier();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_barrier();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#ifdef __s390x__
+
+#define LONG_L			"lg"
+#define LONG_S			"stg"
+#define LONG_LT_R		"ltgr"
+#define LONG_CMP		"cg"
+#define LONG_CMP_R		"cgr"
+#define LONG_ADDI		"aghi"
+#define LONG_ADD_R		"agr"
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,			\
+				start_ip, post_commit_offset, abort_ip)	\
+		".pushsection __rseq_table, \"aw\"\n\t"			\
+		".balign 32\n\t"					\
+		__rseq_str(label) ":\n\t"				\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+		".popsection\n\t"
+
+#elif __s390__
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,			\
+				start_ip, post_commit_offset, abort_ip)	\
+		".pushsection __rseq_table, \"aw\"\n\t"			\
+		".balign 32\n\t"					\
+		__rseq_str(label) ":\n\t"				\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+		".popsection\n\t"
+
+#define LONG_L			"l"
+#define LONG_S			"st"
+#define LONG_LT_R		"ltr"
+#define LONG_CMP		"c"
+#define LONG_CMP_R		"cr"
+#define LONG_ADDI		"ahi"
+#define LONG_ADD_R		"ar"
+
+#endif
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
+		RSEQ_INJECT_ASM(1)					\
+		"larl %%r0, " __rseq_str(cs_label) "\n\t"		\
+		LONG_S " %%r0, %[" __rseq_str(rseq_cs) "]\n\t"		\
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
+		RSEQ_INJECT_ASM(2)					\
+		"c %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+		"jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		".long " __rseq_str(RSEQ_SIG) "\n\t"			\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"j %l[" __rseq_str(abort_label) "]\n\t"			\
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"j %l[" __rseq_str(cmpfail_label) "]\n\t"		\
+		".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_L " %%r1, %[v]\n\t"
+		LONG_CMP_R " %%r1, %[expectnot]\n\t"
+		"je %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_L " %%r1, %[v]\n\t"
+		LONG_CMP_R " %%r1, %[expectnot]\n\t"
+		"je %l[error2]\n\t"
+#endif
+		LONG_S " %%r1, %[load]\n\t"
+		LONG_ADD_R " %%r1, %[voffp]\n\t"
+		LONG_L " %%r1, 0(%%r1)\n\t"
+		/* final store */
+		LONG_S " %%r1, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"r" (voffp),
+		  [load]		"m" (*load)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0", "r1"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		LONG_L " %%r0, %[v]\n\t"
+		LONG_ADD_R " %%r0, %[count]\n\t"
+		/* final store */
+		LONG_S " %%r0, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [count]		"r" (count)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* try store */
+		LONG_S " %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* s390 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		LONG_CMP " %[expect2], %[v2]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz %l[error2]\n\t"
+		LONG_CMP " %[expect2], %[v2]\n\t"
+		"jnz %l[error3]\n\t"
+#endif
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uint64_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		LONG_S " %[src], %[rseq_scratch0]\n\t"
+		LONG_S " %[dst], %[rseq_scratch1]\n\t"
+		LONG_S " %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		LONG_CMP " %[expect], %[v]\n\t"
+		"jnz 7f\n\t"
+#endif
+		/* try memcpy */
+		LONG_LT_R " %[len], %[len]\n\t"
+		"jz 333f\n\t"
+		"222:\n\t"
+		"ic %%r0,0(%[src])\n\t"
+		"stc %%r0,0(%[dst])\n\t"
+		LONG_ADDI " %[src], 1\n\t"
+		LONG_ADDI " %[dst], 1\n\t"
+		LONG_ADDI " %[len], -1\n\t"
+		"jnz 222b\n\t"
+		"333:\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		LONG_S " %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		LONG_L " %[len], %[rseq_scratch2]\n\t"
+		LONG_L " %[dst], %[rseq_scratch1]\n\t"
+		LONG_L " %[src], %[rseq_scratch0]\n\t"
+		RSEQ_ASM_DEFINE_ABORT(4,
+			LONG_L " %[len], %[rseq_scratch2]\n\t"
+			LONG_L " %[dst], %[rseq_scratch1]\n\t"
+			LONG_L " %[src], %[rseq_scratch0]\n\t",
+			abort)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+			LONG_L " %[len], %[rseq_scratch2]\n\t"
+			LONG_L " %[dst], %[rseq_scratch1]\n\t"
+			LONG_L " %[src], %[rseq_scratch0]\n\t",
+			cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+			LONG_L " %[len], %[rseq_scratch2]\n\t"
+			LONG_L " %[dst], %[rseq_scratch1]\n\t"
+			LONG_L " %[src], %[rseq_scratch0]\n\t",
+			error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+			LONG_L " %[len], %[rseq_scratch2]\n\t"
+			LONG_L " %[dst], %[rseq_scratch1]\n\t"
+			LONG_L " %[src], %[rseq_scratch0]\n\t",
+			error2)
+#endif
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		  RSEQ_INJECT_INPUT
+		: "memory", "cc", "r0"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* s390 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
+					    newv, cpu);
+}
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h
new file mode 100644
index 000000000..72750b590
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-skip.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-skip.h
+ *
+ * (C) Copyright 2017-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	return -1;
+}
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
new file mode 100644
index 000000000..089410a31
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -0,0 +1,1132 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-x86.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#include <stdint.h>
+
+#define RSEQ_SIG	0x53053053
+
+#ifdef __x86_64__
+
+#define rseq_smp_mb()	\
+	__asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
+#define rseq_smp_rmb()	rseq_barrier()
+#define rseq_smp_wmb()	rseq_barrier()
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_barrier();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_barrier();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,			\
+				start_ip, post_commit_offset, abort_ip)	\
+		".pushsection __rseq_table, \"aw\"\n\t"			\
+		".balign 32\n\t"					\
+		__rseq_str(label) ":\n\t"				\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
+		RSEQ_INJECT_ASM(1)					\
+		"leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t"	\
+		"movq %%rax, %[" __rseq_str(rseq_cs) "]\n\t"		\
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
+		RSEQ_INJECT_ASM(2)					\
+		"cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+		"jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		/* Disassembler-friendly signature: nopl <sig>(%rip). */\
+		".byte 0x0f, 0x1f, 0x05\n\t"				\
+		".long " __rseq_str(RSEQ_SIG) "\n\t"			\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"jmp %l[" __rseq_str(abort_label) "]\n\t"		\
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"jmp %l[" __rseq_str(cmpfail_label) "]\n\t"		\
+		".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* final store */
+		"movq %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "rax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"movq %[v], %%rbx\n\t"
+		"cmpq %%rbx, %[expectnot]\n\t"
+		"je %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"movq %[v], %%rbx\n\t"
+		"cmpq %%rbx, %[expectnot]\n\t"
+		"je %l[error2]\n\t"
+#endif
+		"movq %%rbx, %[load]\n\t"
+		"addq %[voffp], %%rbx\n\t"
+		"movq (%%rbx), %%rbx\n\t"
+		/* final store */
+		"movq %%rbx, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"er" (voffp),
+		  [load]		"m" (*load)
+		: "memory", "cc", "rax", "rbx"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		/* final store */
+		"addq %[count], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [count]		"er" (count)
+		: "memory", "cc", "rax"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* try store */
+		"movq %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		"movq %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "rax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* x86-64 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		"cmpq %[v2], %[expect2]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpq %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+		"cmpq %[v2], %[expect2]\n\t"
+		"jnz %l[error3]\n\t"
+#endif
+		/* final store */
+		"movq %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "rax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uint64_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		"movq %[src], %[rseq_scratch0]\n\t"
+		"movq %[dst], %[rseq_scratch1]\n\t"
+		"movq %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpq %[v], %[expect]\n\t"
+		"jnz 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		"cmpq %[v], %[expect]\n\t"
+		"jnz 7f\n\t"
+#endif
+		/* try memcpy */
+		"test %[len], %[len]\n\t" \
+		"jz 333f\n\t" \
+		"222:\n\t" \
+		"movb (%[src]), %%al\n\t" \
+		"movb %%al, (%[dst])\n\t" \
+		"inc %[src]\n\t" \
+		"inc %[dst]\n\t" \
+		"dec %[len]\n\t" \
+		"jnz 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		"movq %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		"movq %[rseq_scratch2], %[len]\n\t"
+		"movq %[rseq_scratch1], %[dst]\n\t"
+		"movq %[rseq_scratch0], %[src]\n\t"
+		RSEQ_ASM_DEFINE_ABORT(4,
+			"movq %[rseq_scratch2], %[len]\n\t"
+			"movq %[rseq_scratch1], %[dst]\n\t"
+			"movq %[rseq_scratch0], %[src]\n\t",
+			abort)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+			"movq %[rseq_scratch2], %[len]\n\t"
+			"movq %[rseq_scratch1], %[dst]\n\t"
+			"movq %[rseq_scratch0], %[src]\n\t",
+			cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+			"movq %[rseq_scratch2], %[len]\n\t"
+			"movq %[rseq_scratch1], %[dst]\n\t"
+			"movq %[rseq_scratch0], %[src]\n\t",
+			error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+			"movq %[rseq_scratch2], %[len]\n\t"
+			"movq %[rseq_scratch1], %[dst]\n\t"
+			"movq %[rseq_scratch0], %[src]\n\t",
+			error2)
+#endif
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		: "memory", "cc", "rax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* x86-64 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
+					    newv, cpu);
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
+
+#elif __i386__
+
+#define rseq_smp_mb()	\
+	__asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+#define rseq_smp_rmb()	\
+	__asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+#define rseq_smp_wmb()	\
+	__asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+
+#define rseq_smp_load_acquire(p)					\
+__extension__ ({							\
+	__typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);			\
+	rseq_smp_mb();							\
+	____p1;								\
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()	rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)					\
+do {									\
+	rseq_smp_mb();							\
+	RSEQ_WRITE_ONCE(*p, v);						\
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+/*
+ * Use eax as scratch register and take memory operands as input to
+ * lessen register pressure. Especially needed when compiling in O0.
+ */
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags,			\
+				start_ip, post_commit_offset, abort_ip)	\
+		".pushsection __rseq_table, \"aw\"\n\t"			\
+		".balign 32\n\t"					\
+		__rseq_str(label) ":\n\t"				\
+		".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+		".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+	__RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip,		\
+				(post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs)		\
+		RSEQ_INJECT_ASM(1)					\
+		"movl $" __rseq_str(cs_label) ", %[rseq_cs]\n\t"	\
+		__rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label)		\
+		RSEQ_INJECT_ASM(2)					\
+		"cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+		"jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		/* Disassembler-friendly signature: nopl <sig>. */	\
+		".byte 0x0f, 0x1f, 0x05\n\t"				\
+		".long " __rseq_str(RSEQ_SIG) "\n\t"			\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"jmp %l[" __rseq_str(abort_label) "]\n\t"		\
+		".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label)		\
+		".pushsection __rseq_failure, \"ax\"\n\t"		\
+		__rseq_str(label) ":\n\t"				\
+		teardown						\
+		"jmp %l[" __rseq_str(cmpfail_label) "]\n\t"		\
+		".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* final store */
+		"movl %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+			       off_t voffp, intptr_t *load, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"movl %[v], %%ebx\n\t"
+		"cmpl %%ebx, %[expectnot]\n\t"
+		"je %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"movl %[v], %%ebx\n\t"
+		"cmpl %%ebx, %[expectnot]\n\t"
+		"je %l[error2]\n\t"
+#endif
+		"movl %%ebx, %[load]\n\t"
+		"addl %[voffp], %%ebx\n\t"
+		"movl (%%ebx), %%ebx\n\t"
+		/* final store */
+		"movl %%ebx, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(5)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expectnot]		"r" (expectnot),
+		  [voffp]		"ir" (voffp),
+		  [load]		"m" (*load)
+		: "memory", "cc", "eax", "ebx"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+		/* final store */
+		"addl %[count], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [count]		"ir" (count)
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+				 intptr_t *v2, intptr_t newv2,
+				 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* try store */
+		"movl %[newv2], %%eax\n\t"
+		"movl %%eax, %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		/* final store */
+		"movl %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"m" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+					 intptr_t *v2, intptr_t newv2,
+					 intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"movl %[expect], %%eax\n\t"
+		"cmpl %[v], %%eax\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"movl %[expect], %%eax\n\t"
+		"cmpl %[v], %%eax\n\t"
+		"jnz %l[error2]\n\t"
+#endif
+		/* try store */
+		"movl %[newv2], %[v2]\n\t"
+		RSEQ_INJECT_ASM(5)
+		"lock; addl $0,-128(%%esp)\n\t"
+		/* final store */
+		"movl %[newv], %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* try store input */
+		  [v2]			"m" (*v2),
+		  [newv2]		"r" (newv2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"m" (expect),
+		  [newv]		"r" (newv)
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+			      intptr_t *v2, intptr_t expect2,
+			      intptr_t newv, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(4)
+		"cmpl %[expect2], %[v2]\n\t"
+		"jnz %l[cmpfail]\n\t"
+		RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+		"cmpl %[v], %[expect]\n\t"
+		"jnz %l[error2]\n\t"
+		"cmpl %[expect2], %[v2]\n\t"
+		"jnz %l[error3]\n\t"
+#endif
+		"movl %[newv], %%eax\n\t"
+		/* final store */
+		"movl %%eax, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* cmp2 input */
+		  [v2]			"m" (*v2),
+		  [expect2]		"r" (expect2),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"r" (expect),
+		  [newv]		"m" (newv)
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2, error3
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("1st expected value comparison failed");
+error3:
+	rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+/* TODO: implement a faster memcpy. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+				 void *dst, void *src, size_t len,
+				 intptr_t newv, int cpu)
+{
+	uint32_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		"movl %[src], %[rseq_scratch0]\n\t"
+		"movl %[dst], %[rseq_scratch1]\n\t"
+		"movl %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"movl %[expect], %%eax\n\t"
+		"cmpl %%eax, %[v]\n\t"
+		"jnz 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		"movl %[expect], %%eax\n\t"
+		"cmpl %%eax, %[v]\n\t"
+		"jnz 7f\n\t"
+#endif
+		/* try memcpy */
+		"test %[len], %[len]\n\t" \
+		"jz 333f\n\t" \
+		"222:\n\t" \
+		"movb (%[src]), %%al\n\t" \
+		"movb %%al, (%[dst])\n\t" \
+		"inc %[src]\n\t" \
+		"inc %[dst]\n\t" \
+		"dec %[len]\n\t" \
+		"jnz 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		"movl %[newv], %%eax\n\t"
+		/* final store */
+		"movl %%eax, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		"movl %[rseq_scratch2], %[len]\n\t"
+		"movl %[rseq_scratch1], %[dst]\n\t"
+		"movl %[rseq_scratch0], %[src]\n\t"
+		RSEQ_ASM_DEFINE_ABORT(4,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			abort)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			error2)
+#endif
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"m" (expect),
+		  [newv]		"m" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* TODO: implement a faster memcpy. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+					 void *dst, void *src, size_t len,
+					 intptr_t newv, int cpu)
+{
+	uint32_t rseq_scratch[3];
+
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+		"movl %[src], %[rseq_scratch0]\n\t"
+		"movl %[dst], %[rseq_scratch1]\n\t"
+		"movl %[len], %[rseq_scratch2]\n\t"
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+		RSEQ_INJECT_ASM(3)
+		"movl %[expect], %%eax\n\t"
+		"cmpl %%eax, %[v]\n\t"
+		"jnz 5f\n\t"
+		RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+		"movl %[expect], %%eax\n\t"
+		"cmpl %%eax, %[v]\n\t"
+		"jnz 7f\n\t"
+#endif
+		/* try memcpy */
+		"test %[len], %[len]\n\t" \
+		"jz 333f\n\t" \
+		"222:\n\t" \
+		"movb (%[src]), %%al\n\t" \
+		"movb %%al, (%[dst])\n\t" \
+		"inc %[src]\n\t" \
+		"inc %[dst]\n\t" \
+		"dec %[len]\n\t" \
+		"jnz 222b\n\t" \
+		"333:\n\t" \
+		RSEQ_INJECT_ASM(5)
+		"lock; addl $0,-128(%%esp)\n\t"
+		"movl %[newv], %%eax\n\t"
+		/* final store */
+		"movl %%eax, %[v]\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(6)
+		/* teardown */
+		"movl %[rseq_scratch2], %[len]\n\t"
+		"movl %[rseq_scratch1], %[dst]\n\t"
+		"movl %[rseq_scratch0], %[src]\n\t"
+		RSEQ_ASM_DEFINE_ABORT(4,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			abort)
+		RSEQ_ASM_DEFINE_CMPFAIL(5,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_CMPFAIL(6,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			error1)
+		RSEQ_ASM_DEFINE_CMPFAIL(7,
+			"movl %[rseq_scratch2], %[len]\n\t"
+			"movl %[rseq_scratch1], %[dst]\n\t"
+			"movl %[rseq_scratch0], %[src]\n\t",
+			error2)
+#endif
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [current_cpu_id]	"m" (__rseq_abi.cpu_id),
+		  [rseq_cs]		"m" (__rseq_abi.rseq_cs),
+		  /* final store input */
+		  [v]			"m" (*v),
+		  [expect]		"m" (expect),
+		  [newv]		"m" (newv),
+		  /* try memcpy input */
+		  [dst]			"r" (dst),
+		  [src]			"r" (src),
+		  [len]			"r" (len),
+		  [rseq_scratch0]	"m" (rseq_scratch[0]),
+		  [rseq_scratch1]	"m" (rseq_scratch[1]),
+		  [rseq_scratch2]	"m" (rseq_scratch[2])
+		: "memory", "cc", "eax"
+		  RSEQ_INJECT_CLOBBER
+		: abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1, error2
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+cmpfail:
+	return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+error2:
+	rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
new file mode 100644
index 000000000..4847e97ed
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * rseq.c
+ *
+ * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <assert.h>
+#include <signal.h>
+
+#include "rseq.h"
+
+#define ARRAY_SIZE(arr)	(sizeof(arr) / sizeof((arr)[0]))
+
+__attribute__((tls_model("initial-exec"))) __thread
+volatile struct rseq __rseq_abi = {
+	.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+};
+
+static __attribute__((tls_model("initial-exec"))) __thread
+volatile int refcount;
+
+static void signal_off_save(sigset_t *oldset)
+{
+	sigset_t set;
+	int ret;
+
+	sigfillset(&set);
+	ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
+	if (ret)
+		abort();
+}
+
+static void signal_restore(sigset_t oldset)
+{
+	int ret;
+
+	ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+	if (ret)
+		abort();
+}
+
+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
+		    int flags, uint32_t sig)
+{
+	return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+}
+
+int rseq_register_current_thread(void)
+{
+	int rc, ret = 0;
+	sigset_t oldset;
+
+	signal_off_save(&oldset);
+	if (refcount++)
+		goto end;
+	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
+	if (!rc) {
+		assert(rseq_current_cpu_raw() >= 0);
+		goto end;
+	}
+	if (errno != EBUSY)
+		__rseq_abi.cpu_id = -2;
+	ret = -1;
+	refcount--;
+end:
+	signal_restore(oldset);
+	return ret;
+}
+
+int rseq_unregister_current_thread(void)
+{
+	int rc, ret = 0;
+	sigset_t oldset;
+
+	signal_off_save(&oldset);
+	if (--refcount)
+		goto end;
+	rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
+		      RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
+	if (!rc)
+		goto end;
+	ret = -1;
+end:
+	signal_restore(oldset);
+	return ret;
+}
+
+int32_t rseq_fallback_current_cpu(void)
+{
+	int32_t cpu;
+
+	cpu = sched_getcpu();
+	if (cpu < 0) {
+		perror("sched_getcpu()");
+		abort();
+	}
+	return cpu;
+}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
new file mode 100644
index 000000000..c72eb70f9
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef RSEQ_H
+#define RSEQ_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <linux/rseq.h>
+
+/*
+ * Empty code injection macros, override when testing.
+ * It is important to consider that the ASM injection macros need to be
+ * fully reentrant (e.g. do not modify the stack).
+ */
+#ifndef RSEQ_INJECT_ASM
+#define RSEQ_INJECT_ASM(n)
+#endif
+
+#ifndef RSEQ_INJECT_C
+#define RSEQ_INJECT_C(n)
+#endif
+
+#ifndef RSEQ_INJECT_INPUT
+#define RSEQ_INJECT_INPUT
+#endif
+
+#ifndef RSEQ_INJECT_CLOBBER
+#define RSEQ_INJECT_CLOBBER
+#endif
+
+#ifndef RSEQ_INJECT_FAILED
+#define RSEQ_INJECT_FAILED
+#endif
+
+extern __thread volatile struct rseq __rseq_abi;
+
+#define rseq_likely(x)		__builtin_expect(!!(x), 1)
+#define rseq_unlikely(x)	__builtin_expect(!!(x), 0)
+#define rseq_barrier()		__asm__ __volatile__("" : : : "memory")
+
+#define RSEQ_ACCESS_ONCE(x)	(*(__volatile__  __typeof__(x) *)&(x))
+#define RSEQ_WRITE_ONCE(x, v)	__extension__ ({ RSEQ_ACCESS_ONCE(x) = (v); })
+#define RSEQ_READ_ONCE(x)	RSEQ_ACCESS_ONCE(x)
+
+#define __rseq_str_1(x)	#x
+#define __rseq_str(x)		__rseq_str_1(x)
+
+#define rseq_log(fmt, args...)						       \
+	fprintf(stderr, fmt "(in %s() at " __FILE__ ":" __rseq_str(__LINE__)"\n", \
+		## args, __func__)
+
+#define rseq_bug(fmt, args...)		\
+	do {				\
+		rseq_log(fmt, ##args);	\
+		abort();		\
+	} while (0)
+
+#if defined(__x86_64__) || defined(__i386__)
+#include <rseq-x86.h>
+#elif defined(__ARMEL__)
+#include <rseq-arm.h>
+#elif defined (__AARCH64EL__)
+#include <rseq-arm64.h>
+#elif defined(__PPC__)
+#include <rseq-ppc.h>
+#elif defined(__mips__)
+#include <rseq-mips.h>
+#elif defined(__s390__)
+#include <rseq-s390.h>
+#else
+#error unsupported target
+#endif
+
+/*
+ * Register rseq for the current thread. This needs to be called once
+ * by any thread which uses restartable sequences, before they start
+ * using restartable sequences, to ensure restartable sequences
+ * succeed. A restartable sequence executed from a non-registered
+ * thread will always fail.
+ */
+int rseq_register_current_thread(void);
+
+/*
+ * Unregister rseq for current thread.
+ */
+int rseq_unregister_current_thread(void);
+
+/*
+ * Restartable sequence fallback for reading the current CPU number.
+ */
+int32_t rseq_fallback_current_cpu(void);
+
+/*
+ * Values returned can be either the current CPU number, -1 (rseq is
+ * uninitialized), or -2 (rseq initialization has failed).
+ */
+static inline int32_t rseq_current_cpu_raw(void)
+{
+	return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id);
+}
+
+/*
+ * Returns a possible CPU number, which is typically the current CPU.
+ * The returned CPU number can be used to prepare for an rseq critical
+ * section, which will confirm whether the cpu number is indeed the
+ * current one, and whether rseq is initialized.
+ *
+ * The CPU number returned by rseq_cpu_start should always be validated
+ * by passing it to a rseq asm sequence, or by comparing it to the
+ * return value of rseq_current_cpu_raw() if the rseq asm sequence
+ * does not need to be invoked.
+ */
+static inline uint32_t rseq_cpu_start(void)
+{
+	return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
+}
+
+static inline uint32_t rseq_current_cpu(void)
+{
+	int32_t cpu;
+
+	cpu = rseq_current_cpu_raw();
+	if (rseq_unlikely(cpu < 0))
+		cpu = rseq_fallback_current_cpu();
+	return cpu;
+}
+
+static inline void rseq_clear_rseq_cs(void)
+{
+#ifdef __LP64__
+	__rseq_abi.rseq_cs.ptr = 0;
+#else
+	__rseq_abi.rseq_cs.ptr.ptr32 = 0;
+#endif
+}
+
+/*
+ * rseq_prepare_unload() should be invoked by each thread executing a rseq
+ * critical section at least once between their last critical section and
+ * library unload of the library defining the rseq critical section
+ * (struct rseq_cs). This also applies to use of rseq in code generated by
+ * JIT: rseq_prepare_unload() should be invoked at least once by each
+ * thread executing a rseq critical section before reclaim of the memory
+ * holding the struct rseq_cs.
+ */
+static inline void rseq_prepare_unload(void)
+{
+	rseq_clear_rseq_cs();
+}
+
+#endif  /* RSEQ_H_ */
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
new file mode 100755
index 000000000..3acd6d75f
--- /dev/null
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+ or MIT
+
+EXTRA_ARGS=${@}
+
+OLDIFS="$IFS"
+IFS=$'\n'
+TEST_LIST=(
+	"-T s"
+	"-T l"
+	"-T b"
+	"-T b -M"
+	"-T m"
+	"-T m -M"
+	"-T i"
+)
+
+TEST_NAME=(
+	"spinlock"
+	"list"
+	"buffer"
+	"buffer with barrier"
+	"memcpy"
+	"memcpy with barrier"
+	"increment"
+)
+IFS="$OLDIFS"
+
+REPS=1000
+SLOW_REPS=100
+
+function do_tests()
+{
+	local i=0
+	while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
+		echo "Running test ${TEST_NAME[$i]}"
+		./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
+		echo "Running compare-twice test ${TEST_NAME[$i]}"
+		./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
+		let "i++"
+	done
+}
+
+echo "Default parameters"
+do_tests
+
+echo "Loop injection: 10000 loops"
+
+OLDIFS="$IFS"
+IFS=$'\n'
+INJECT_LIST=(
+	"1"
+	"2"
+	"3"
+	"4"
+	"5"
+	"6"
+	"7"
+	"8"
+	"9"
+)
+IFS="$OLDIFS"
+
+NR_LOOPS=10000
+
+i=0
+while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
+	echo "Injecting at <${INJECT_LIST[$i]}>"
+	do_tests -${INJECT_LIST[i]} ${NR_LOOPS}
+	let "i++"
+done
+NR_LOOPS=
+
+function inject_blocking()
+{
+	OLDIFS="$IFS"
+	IFS=$'\n'
+	INJECT_LIST=(
+		"7"
+		"8"
+		"9"
+	)
+	IFS="$OLDIFS"
+
+	NR_LOOPS=-1
+
+	i=0
+	while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
+		echo "Injecting at <${INJECT_LIST[$i]}>"
+		do_tests -${INJECT_LIST[i]} -1 ${@}
+		let "i++"
+	done
+	NR_LOOPS=
+}
+
+echo "Yield injection (25%)"
+inject_blocking -m 4 -y
+
+echo "Yield injection (50%)"
+inject_blocking -m 2 -y
+
+echo "Yield injection (100%)"
+inject_blocking -m 1 -y
+
+echo "Kill injection (25%)"
+inject_blocking -m 4 -k
+
+echo "Kill injection (50%)"
+inject_blocking -m 2 -k
+
+echo "Kill injection (100%)"
+inject_blocking -m 1 -k
+
+echo "Sleep injection (1ms, 25%)"
+inject_blocking -m 4 -s 1
+
+echo "Sleep injection (1ms, 50%)"
+inject_blocking -m 2 -s 1
+
+echo "Sleep injection (1ms, 100%)"
+inject_blocking -m 1 -s 1
diff --git a/tools/testing/selftests/rseq/settings b/tools/testing/selftests/rseq/settings
new file mode 100644
index 000000000..e7b941753
--- /dev/null
+++ b/tools/testing/selftests/rseq/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore
new file mode 100644
index 000000000..d0ad44f62
--- /dev/null
+++ b/tools/testing/selftests/rtc/.gitignore
@@ -0,0 +1,2 @@
+rtctest
+setdate
diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile
new file mode 100644
index 000000000..de9c85666
--- /dev/null
+++ b/tools/testing/selftests/rtc/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
+LDFLAGS += -lrt -lpthread -lm
+
+TEST_GEN_PROGS = rtctest
+
+TEST_GEN_PROGS_EXTENDED = setdate
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
new file mode 100644
index 000000000..b2065536d
--- /dev/null
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Driver Test Program
+ *
+ * Copyright (c) 2018 Alexandre Belloni <alexandre.belloni@bootlin.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/rtc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define NUM_UIE 3
+#define ALARM_DELTA 3
+
+static char *rtc_file = "/dev/rtc0";
+
+FIXTURE(rtc) {
+	int fd;
+};
+
+FIXTURE_SETUP(rtc) {
+	self->fd = open(rtc_file, O_RDONLY);
+	ASSERT_NE(-1, self->fd);
+}
+
+FIXTURE_TEARDOWN(rtc) {
+	close(self->fd);
+}
+
+TEST_F(rtc, date_read) {
+	int rc;
+	struct rtc_time rtc_tm;
+
+	/* Read the RTC time/date */
+	rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Current RTC date/time is %02d/%02d/%02d %02d:%02d:%02d.",
+	       rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+	       rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+}
+
+TEST_F(rtc, uie_read) {
+	int i, rc, irq = 0;
+	unsigned long data;
+
+	/* Turn on update interrupts */
+	rc = ioctl(self->fd, RTC_UIE_ON, 0);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip update IRQs not supported.");
+		return;
+	}
+
+	for (i = 0; i < NUM_UIE; i++) {
+		/* This read will block */
+		rc = read(self->fd, &data, sizeof(data));
+		ASSERT_NE(-1, rc);
+		irq++;
+	}
+
+	EXPECT_EQ(NUM_UIE, irq);
+
+	rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+	ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, uie_select) {
+	int i, rc, irq = 0;
+	unsigned long data;
+
+	/* Turn on update interrupts */
+	rc = ioctl(self->fd, RTC_UIE_ON, 0);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip update IRQs not supported.");
+		return;
+	}
+
+	for (i = 0; i < NUM_UIE; i++) {
+		struct timeval tv = { .tv_sec = 2 };
+		fd_set readfds;
+
+		FD_ZERO(&readfds);
+		FD_SET(self->fd, &readfds);
+		/* The select will wait until an RTC interrupt happens. */
+		rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+		ASSERT_NE(-1, rc);
+		ASSERT_NE(0, rc);
+
+		/* This read won't block */
+		rc = read(self->fd, &data, sizeof(unsigned long));
+		ASSERT_NE(-1, rc);
+		irq++;
+	}
+
+	EXPECT_EQ(NUM_UIE, irq);
+
+	rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+	ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, alarm_alm_set) {
+	struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+	unsigned long data;
+	struct rtc_time tm;
+	fd_set readfds;
+	time_t secs, new;
+	int rc;
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	secs = timegm((struct tm *)&tm) + ALARM_DELTA;
+	gmtime_r(&secs, (struct tm *)&tm);
+
+	rc = ioctl(self->fd, RTC_ALM_SET, &tm);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip alarms are not supported.");
+		return;
+	}
+
+	rc = ioctl(self->fd, RTC_ALM_READ, &tm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Alarm time now set to %02d:%02d:%02d.",
+	       tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+	/* Enable alarm interrupts */
+	rc = ioctl(self->fd, RTC_AIE_ON, 0);
+	ASSERT_NE(-1, rc);
+
+	FD_ZERO(&readfds);
+	FD_SET(self->fd, &readfds);
+
+	rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+	ASSERT_NE(-1, rc);
+	ASSERT_NE(0, rc);
+
+	/* Disable alarm interrupts */
+	rc = ioctl(self->fd, RTC_AIE_OFF, 0);
+	ASSERT_NE(-1, rc);
+
+	rc = read(self->fd, &data, sizeof(unsigned long));
+	ASSERT_NE(-1, rc);
+	TH_LOG("data: %lx", data);
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	new = timegm((struct tm *)&tm);
+	ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_wkalm_set) {
+	struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+	struct rtc_wkalrm alarm = { 0 };
+	struct rtc_time tm;
+	unsigned long data;
+	fd_set readfds;
+	time_t secs, new;
+	int rc;
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
+	ASSERT_NE(-1, rc);
+
+	secs = timegm((struct tm *)&alarm.time) + ALARM_DELTA;
+	gmtime_r(&secs, (struct tm *)&alarm.time);
+
+	alarm.enabled = 1;
+
+	rc = ioctl(self->fd, RTC_WKALM_SET, &alarm);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip alarms are not supported.");
+		return;
+	}
+
+	rc = ioctl(self->fd, RTC_WKALM_RD, &alarm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.",
+	       alarm.time.tm_mday, alarm.time.tm_mon + 1,
+	       alarm.time.tm_year + 1900, alarm.time.tm_hour,
+	       alarm.time.tm_min, alarm.time.tm_sec);
+
+	FD_ZERO(&readfds);
+	FD_SET(self->fd, &readfds);
+
+	rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+	ASSERT_NE(-1, rc);
+	ASSERT_NE(0, rc);
+
+	rc = read(self->fd, &data, sizeof(unsigned long));
+	ASSERT_NE(-1, rc);
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	new = timegm((struct tm *)&tm);
+	ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_alm_set_minute) {
+	struct timeval tv = { .tv_sec = 62 };
+	unsigned long data;
+	struct rtc_time tm;
+	fd_set readfds;
+	time_t secs, new;
+	int rc;
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	secs = timegm((struct tm *)&tm) + 60 - tm.tm_sec;
+	gmtime_r(&secs, (struct tm *)&tm);
+
+	rc = ioctl(self->fd, RTC_ALM_SET, &tm);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip alarms are not supported.");
+		return;
+	}
+
+	rc = ioctl(self->fd, RTC_ALM_READ, &tm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Alarm time now set to %02d:%02d:%02d.",
+	       tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+	/* Enable alarm interrupts */
+	rc = ioctl(self->fd, RTC_AIE_ON, 0);
+	ASSERT_NE(-1, rc);
+
+	FD_ZERO(&readfds);
+	FD_SET(self->fd, &readfds);
+
+	rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+	ASSERT_NE(-1, rc);
+	ASSERT_NE(0, rc);
+
+	/* Disable alarm interrupts */
+	rc = ioctl(self->fd, RTC_AIE_OFF, 0);
+	ASSERT_NE(-1, rc);
+
+	rc = read(self->fd, &data, sizeof(unsigned long));
+	ASSERT_NE(-1, rc);
+	TH_LOG("data: %lx", data);
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	new = timegm((struct tm *)&tm);
+	ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_wkalm_set_minute) {
+	struct timeval tv = { .tv_sec = 62 };
+	struct rtc_wkalrm alarm = { 0 };
+	struct rtc_time tm;
+	unsigned long data;
+	fd_set readfds;
+	time_t secs, new;
+	int rc;
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
+	ASSERT_NE(-1, rc);
+
+	secs = timegm((struct tm *)&alarm.time) + 60 - alarm.time.tm_sec;
+	gmtime_r(&secs, (struct tm *)&alarm.time);
+
+	alarm.enabled = 1;
+
+	rc = ioctl(self->fd, RTC_WKALM_SET, &alarm);
+	if (rc == -1) {
+		ASSERT_EQ(EINVAL, errno);
+		TH_LOG("skip alarms are not supported.");
+		return;
+	}
+
+	rc = ioctl(self->fd, RTC_WKALM_RD, &alarm);
+	ASSERT_NE(-1, rc);
+
+	TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.",
+	       alarm.time.tm_mday, alarm.time.tm_mon + 1,
+	       alarm.time.tm_year + 1900, alarm.time.tm_hour,
+	       alarm.time.tm_min, alarm.time.tm_sec);
+
+	FD_ZERO(&readfds);
+	FD_SET(self->fd, &readfds);
+
+	rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+	ASSERT_NE(-1, rc);
+	ASSERT_NE(0, rc);
+
+	rc = read(self->fd, &data, sizeof(unsigned long));
+	ASSERT_NE(-1, rc);
+
+	rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+	ASSERT_NE(-1, rc);
+
+	new = timegm((struct tm *)&tm);
+	ASSERT_EQ(new, secs);
+}
+
+static void __attribute__((constructor))
+__constructor_order_last(void)
+{
+	if (!__constructor_order)
+		__constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
+}
+
+int main(int argc, char **argv)
+{
+	switch (argc) {
+	case 2:
+		rtc_file = argv[1];
+		/* FALLTHROUGH */
+	case 1:
+		break;
+	default:
+		fprintf(stderr, "usage: %s [rtcdev]\n", argv[0]);
+		return 1;
+	}
+
+	return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/rtc/setdate.c b/tools/testing/selftests/rtc/setdate.c
new file mode 100644
index 000000000..2cb78489e
--- /dev/null
+++ b/tools/testing/selftests/rtc/setdate.c
@@ -0,0 +1,86 @@
+/* Real Time Clock Driver Test
+ *	by: Benjamin Gaignard (benjamin.gaignard@linaro.org)
+ *
+ * To build
+ *	gcc rtctest_setdate.c -o rtctest_setdate
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+static const char default_time[] = "00:00:00";
+
+int main(int argc, char **argv)
+{
+	int fd, retval;
+	struct rtc_time new, current;
+	const char *rtc, *date;
+	const char *time = default_time;
+
+	switch (argc) {
+	case 4:
+		time = argv[3];
+		/* FALLTHROUGH */
+	case 3:
+		date = argv[2];
+		rtc = argv[1];
+		break;
+	default:
+		fprintf(stderr, "usage: rtctest_setdate <rtcdev> <DD-MM-YYYY> [HH:MM:SS]\n");
+		return 1;
+	}
+
+	fd = open(rtc, O_RDONLY);
+	if (fd == -1) {
+		perror(rtc);
+		exit(errno);
+	}
+
+	sscanf(date, "%d-%d-%d", &new.tm_mday, &new.tm_mon, &new.tm_year);
+	new.tm_mon -= 1;
+	new.tm_year -= 1900;
+	sscanf(time, "%d:%d:%d", &new.tm_hour, &new.tm_min, &new.tm_sec);
+
+	fprintf(stderr, "Test will set RTC date/time to %d-%d-%d, %02d:%02d:%02d.\n",
+		new.tm_mday, new.tm_mon + 1, new.tm_year + 1900,
+		new.tm_hour, new.tm_min, new.tm_sec);
+
+	/* Write the new date in RTC */
+	retval = ioctl(fd, RTC_SET_TIME, &new);
+	if (retval == -1) {
+		perror("RTC_SET_TIME ioctl");
+		close(fd);
+		exit(errno);
+	}
+
+	/* Read back */
+	retval = ioctl(fd, RTC_RD_TIME, &current);
+	if (retval == -1) {
+		perror("RTC_RD_TIME ioctl");
+		exit(errno);
+	}
+
+	fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
+		current.tm_mday, current.tm_mon + 1, current.tm_year + 1900,
+		current.tm_hour, current.tm_min, current.tm_sec);
+
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/seccomp/.gitignore b/tools/testing/selftests/seccomp/.gitignore
new file mode 100644
index 000000000..5af29d3a1
--- /dev/null
+++ b/tools/testing/selftests/seccomp/.gitignore
@@ -0,0 +1,2 @@
+seccomp_bpf
+seccomp_benchmark
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
new file mode 100644
index 000000000..1760b3e39
--- /dev/null
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+BINARIES := seccomp_bpf seccomp_benchmark
+CFLAGS += -Wl,-no-as-needed -Wall
+
+seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
+	$(CC) $(CFLAGS) $(LDFLAGS) $< -lpthread -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config
new file mode 100644
index 000000000..db1e11b08
--- /dev/null
+++ b/tools/testing/selftests/seccomp/config
@@ -0,0 +1,2 @@
+CONFIG_SECCOMP=y
+CONFIG_SECCOMP_FILTER=y
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
new file mode 100644
index 000000000..5838c8697
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -0,0 +1,99 @@
+/*
+ * Strictly speaking, this is not a test. But it can report during test
+ * runs so relative performace can be measured.
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#define ARRAY_SIZE(a)    (sizeof(a) / sizeof(a[0]))
+
+unsigned long long timing(clockid_t clk_id, unsigned long long samples)
+{
+	pid_t pid, ret;
+	unsigned long long i;
+	struct timespec start, finish;
+
+	pid = getpid();
+	assert(clock_gettime(clk_id, &start) == 0);
+	for (i = 0; i < samples; i++) {
+		ret = syscall(__NR_getpid);
+		assert(pid == ret);
+	}
+	assert(clock_gettime(clk_id, &finish) == 0);
+
+	i = finish.tv_sec - start.tv_sec;
+	i *= 1000000000;
+	i += finish.tv_nsec - start.tv_nsec;
+
+	printf("%lu.%09lu - %lu.%09lu = %llu\n",
+		finish.tv_sec, finish.tv_nsec,
+		start.tv_sec, start.tv_nsec,
+		i);
+
+	return i;
+}
+
+unsigned long long calibrate(void)
+{
+	unsigned long long i;
+
+	printf("Calibrating reasonable sample size...\n");
+
+	for (i = 5; ; i++) {
+		unsigned long long samples = 1 << i;
+
+		/* Find something that takes more than 5 seconds to run. */
+		if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
+			return samples;
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	unsigned long long samples;
+	unsigned long long native, filtered;
+
+	if (argc > 1)
+		samples = strtoull(argv[1], NULL, 0);
+	else
+		samples = calibrate();
+
+	printf("Benchmarking %llu samples...\n", samples);
+
+	native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid native: %llu ns\n", native);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	assert(ret == 0);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	assert(ret == 0);
+
+	filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+	printf("getpid RET_ALLOW: %llu ns\n", filtered);
+
+	printf("Estimated seccomp overhead per syscall: %llu ns\n",
+		filtered - native);
+
+	if (filtered == native)
+		printf("Trying running again with more samples.\n");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
new file mode 100644
index 000000000..14cad657b
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -0,0 +1,2997 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * Test code for seccomp bpf.
+ */
+
+#include <sys/types.h>
+
+/*
+ * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
+ * we need to use the kernel's siginfo.h file and trick glibc
+ * into accepting it.
+ */
+#if !__GLIBC_PREREQ(2, 26)
+# include <asm/siginfo.h>
+# define __have_siginfo_t 1
+# define __have_sigval_t 1
+# define __have_sigevent_t 1
+#endif
+
+#include <errno.h>
+#include <linux/filter.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <linux/prctl.h>
+#include <linux/ptrace.h>
+#include <linux/seccomp.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <linux/elf.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <sys/times.h>
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef PR_SET_PTRACER
+# define PR_SET_PTRACER 0x59616d61
+#endif
+
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#define PR_GET_NO_NEW_PRIVS 39
+#endif
+
+#ifndef PR_SECCOMP_EXT
+#define PR_SECCOMP_EXT 43
+#endif
+
+#ifndef SECCOMP_EXT_ACT
+#define SECCOMP_EXT_ACT 1
+#endif
+
+#ifndef SECCOMP_EXT_ACT_TSYNC
+#define SECCOMP_EXT_ACT_TSYNC 1
+#endif
+
+#ifndef SECCOMP_MODE_STRICT
+#define SECCOMP_MODE_STRICT 1
+#endif
+
+#ifndef SECCOMP_MODE_FILTER
+#define SECCOMP_MODE_FILTER 2
+#endif
+
+#ifndef SECCOMP_RET_ALLOW
+struct seccomp_data {
+	int nr;
+	__u32 arch;
+	__u64 instruction_pointer;
+	__u64 args[6];
+};
+#endif
+
+#ifndef SECCOMP_RET_KILL_PROCESS
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
+#endif
+#ifndef SECCOMP_RET_KILL
+#define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
+#endif
+#ifndef SECCOMP_RET_LOG
+#define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
+#endif
+
+#ifndef __NR_seccomp
+# if defined(__i386__)
+#  define __NR_seccomp 354
+# elif defined(__x86_64__)
+#  define __NR_seccomp 317
+# elif defined(__arm__)
+#  define __NR_seccomp 383
+# elif defined(__aarch64__)
+#  define __NR_seccomp 277
+# elif defined(__hppa__)
+#  define __NR_seccomp 338
+# elif defined(__powerpc__)
+#  define __NR_seccomp 358
+# elif defined(__s390__)
+#  define __NR_seccomp 348
+# else
+#  warning "seccomp syscall number unknown for this architecture"
+#  define __NR_seccomp 0xffff
+# endif
+#endif
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
+#endif
+
+#ifndef PTRACE_SECCOMP_GET_METADATA
+#define PTRACE_SECCOMP_GET_METADATA	0x420d
+
+struct seccomp_metadata {
+	__u64 filter_off;       /* Input: which filter */
+	__u64 flags;             /* Output: filter's flags */
+};
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+	errno = 0;
+	return syscall(__NR_seccomp, op, flags, args);
+}
+#endif
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
+#else
+#error "wut? Unknown __BYTE_ORDER?!"
+#endif
+
+#define SIBLING_EXIT_UNKILLED	0xbadbeef
+#define SIBLING_EXIT_FAILURE	0xbadface
+#define SIBLING_EXIT_NEWPRIVS	0xbadfeed
+
+TEST(mode_strict_support)
+{
+	long ret;
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SECCOMP");
+	}
+	syscall(__NR_exit, 0);
+}
+
+TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
+{
+	long ret;
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SECCOMP");
+	}
+	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+		NULL, NULL, NULL);
+	EXPECT_FALSE(true) {
+		TH_LOG("Unreachable!");
+	}
+}
+
+/* Note! This doesn't test no new privs behavior */
+TEST(no_new_privs_support)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	EXPECT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+}
+
+/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
+TEST(mode_filter_support)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
+	}
+}
+
+TEST(mode_filter_without_nnp)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
+	ASSERT_LE(0, ret) {
+		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
+	}
+	errno = 0;
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	/* Succeeds with CAP_SYS_ADMIN, fails without */
+	/* TODO(wad) check caps not euid */
+	if (geteuid()) {
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(EACCES, errno);
+	} else {
+		EXPECT_EQ(0, ret);
+	}
+}
+
+#define MAX_INSNS_PER_PATH 32768
+
+TEST(filter_size_limits)
+{
+	int i;
+	int count = BPF_MAXINSNS + 1;
+	struct sock_filter allow[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter *filter;
+	struct sock_fprog prog = { };
+	long ret;
+
+	filter = calloc(count, sizeof(*filter));
+	ASSERT_NE(NULL, filter);
+
+	for (i = 0; i < count; i++)
+		filter[i] = allow[0];
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	prog.filter = filter;
+	prog.len = count;
+
+	/* Too many filter instructions in a single filter. */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_NE(0, ret) {
+		TH_LOG("Installing %d insn filter was allowed", prog.len);
+	}
+
+	/* One less is okay, though. */
+	prog.len -= 1;
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
+	}
+}
+
+TEST(filter_chain_limits)
+{
+	int i;
+	int count = BPF_MAXINSNS;
+	struct sock_filter allow[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter *filter;
+	struct sock_fprog prog = { };
+	long ret;
+
+	filter = calloc(count, sizeof(*filter));
+	ASSERT_NE(NULL, filter);
+
+	for (i = 0; i < count; i++)
+		filter[i] = allow[0];
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	prog.filter = filter;
+	prog.len = 1;
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	prog.len = count;
+
+	/* Too many total filter instructions. */
+	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
+		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+		if (ret != 0)
+			break;
+	}
+	ASSERT_NE(0, ret) {
+		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
+		       i, count, i * (count + 4));
+	}
+}
+
+TEST(mode_filter_cannot_move_to_strict)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+}
+
+
+TEST(mode_filter_get_seccomp)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+	EXPECT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+	EXPECT_EQ(2, ret);
+}
+
+
+TEST(ALLOW_all)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+}
+
+TEST(empty_prog)
+{
+	struct sock_filter filter[] = {
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+}
+
+TEST(log_all)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	/* getppid() should succeed and be logged (no check for logging) */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+}
+
+TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+	EXPECT_EQ(0, syscall(__NR_getpid)) {
+		TH_LOG("getpid() shouldn't ever return");
+	}
+}
+
+/* return code >= 0x80000000 is unused. */
+TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+	EXPECT_EQ(0, syscall(__NR_getpid)) {
+		TH_LOG("getpid() shouldn't ever return");
+	}
+}
+
+TEST_SIGNAL(KILL_all, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+}
+
+TEST_SIGNAL(KILL_one, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
+{
+	void *fatal_address;
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		/* Only both with lower 32-bit for now. */
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
+			(unsigned long)&fatal_address, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+	struct tms timebuf;
+	clock_t clock = times(&timebuf);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_LE(clock, syscall(__NR_times, &timebuf));
+	/* times() should never return. */
+	EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
+}
+
+TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
+{
+#ifndef __NR_mmap2
+	int sysno = __NR_mmap;
+#else
+	int sysno = __NR_mmap2;
+#endif
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		/* Only both with lower 32-bit for now. */
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+	int fd;
+	void *map1, *map2;
+	int page_size = sysconf(_SC_PAGESIZE);
+
+	ASSERT_LT(0, page_size);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	fd = open("/dev/zero", O_RDONLY);
+	ASSERT_NE(-1, fd);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	map1 = (void *)syscall(sysno,
+		NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
+	EXPECT_NE(MAP_FAILED, map1);
+	/* mmap2() should never return. */
+	map2 = (void *)syscall(sysno,
+		 NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
+	EXPECT_EQ(MAP_FAILED, map2);
+
+	/* The test failed, so clean up the resources. */
+	munmap(map1, page_size);
+	munmap(map2, page_size);
+	close(fd);
+}
+
+/* This is a thread task to die via seccomp filter violation. */
+void *kill_thread(void *data)
+{
+	bool die = (bool)data;
+
+	if (die) {
+		prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+		return (void *)SIBLING_EXIT_FAILURE;
+	}
+
+	return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+/* Prepare a thread that will kill itself or both of us. */
+void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+{
+	pthread_t thread;
+	void *status;
+	/* Kill only when calling __NR_prctl. */
+	struct sock_filter filter_thread[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog_thread = {
+		.len = (unsigned short)ARRAY_SIZE(filter_thread),
+		.filter = filter_thread,
+	};
+	struct sock_filter filter_process[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog_process = {
+		.len = (unsigned short)ARRAY_SIZE(filter_process),
+		.filter = filter_process,
+	};
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
+			     kill_process ? &prog_process : &prog_thread));
+
+	/*
+	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
+	 * flag cannot be downgraded by a new filter.
+	 */
+	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+
+	/* Start a thread that will exit immediately. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
+	ASSERT_EQ(0, pthread_join(thread, &status));
+	ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
+
+	/* Start a thread that will die immediately. */
+	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
+	ASSERT_EQ(0, pthread_join(thread, &status));
+	ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
+
+	/*
+	 * If we get here, only the spawned thread died. Let the parent know
+	 * the whole process didn't die (i.e. this thread, the spawner,
+	 * stayed running).
+	 */
+	exit(42);
+}
+
+TEST(KILL_thread)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, false);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If only the thread was killed, we'll see exit 42. */
+	ASSERT_TRUE(WIFEXITED(status));
+	ASSERT_EQ(42, WEXITSTATUS(status));
+}
+
+TEST(KILL_process)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, true);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If the entire process was killed, we'll see SIGSYS. */
+	ASSERT_TRUE(WIFSIGNALED(status));
+	ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
+/* TODO(wad) add 64-bit versus 32-bit arg tests. */
+TEST(arg_out_of_range)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+}
+
+#define ERRNO_FILTER(name, errno)					\
+	struct sock_filter _read_filter_##name[] = {			\
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,				\
+			offsetof(struct seccomp_data, nr)),		\
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),	\
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno),	\
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),		\
+	};								\
+	struct sock_fprog prog_##name = {				\
+		.len = (unsigned short)ARRAY_SIZE(_read_filter_##name),	\
+		.filter = _read_filter_##name,				\
+	}
+
+/* Make sure basic errno values are correctly passed through a filter. */
+TEST(ERRNO_valid)
+{
+	ERRNO_FILTER(valid, E2BIG);
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(E2BIG, errno);
+}
+
+/* Make sure an errno of zero is correctly handled by the arch code. */
+TEST(ERRNO_zero)
+{
+	ERRNO_FILTER(zero, 0);
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* "errno" of 0 is ok. */
+	EXPECT_EQ(0, read(0, NULL, 0));
+}
+
+/*
+ * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
+ * This tests that the errno value gets capped correctly, fixed by
+ * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
+ */
+TEST(ERRNO_capped)
+{
+	ERRNO_FILTER(capped, 4096);
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(4095, errno);
+}
+
+/*
+ * Filters are processed in reverse order: last applied is executed first.
+ * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
+ * SECCOMP_RET_DATA mask results will follow the most recently applied
+ * matching filter return (and not the lowest or highest value).
+ */
+TEST(ERRNO_order)
+{
+	ERRNO_FILTER(first,  11);
+	ERRNO_FILTER(second, 13);
+	ERRNO_FILTER(third,  12);
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(12, errno);
+}
+
+FIXTURE_DATA(TRAP) {
+	struct sock_fprog prog;
+};
+
+FIXTURE_SETUP(TRAP)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+
+	memset(&self->prog, 0, sizeof(self->prog));
+	self->prog.filter = malloc(sizeof(filter));
+	ASSERT_NE(NULL, self->prog.filter);
+	memcpy(self->prog.filter, filter, sizeof(filter));
+	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+}
+
+FIXTURE_TEARDOWN(TRAP)
+{
+	if (self->prog.filter)
+		free(self->prog.filter);
+}
+
+TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+	ASSERT_EQ(0, ret);
+	syscall(__NR_getpid);
+}
+
+/* Ensure that SIGSYS overrides SIG_IGN */
+TEST_F_SIGNAL(TRAP, ign, SIGSYS)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	signal(SIGSYS, SIG_IGN);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+	ASSERT_EQ(0, ret);
+	syscall(__NR_getpid);
+}
+
+static siginfo_t TRAP_info;
+static volatile int TRAP_nr;
+static void TRAP_action(int nr, siginfo_t *info, void *void_context)
+{
+	memcpy(&TRAP_info, info, sizeof(TRAP_info));
+	TRAP_nr = nr;
+}
+
+TEST_F(TRAP, handler)
+{
+	int ret, test;
+	struct sigaction act;
+	sigset_t mask;
+
+	memset(&act, 0, sizeof(act));
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGSYS);
+
+	act.sa_sigaction = &TRAP_action;
+	act.sa_flags = SA_SIGINFO;
+	ret = sigaction(SIGSYS, &act, NULL);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("sigaction failed");
+	}
+	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("sigprocmask failed");
+	}
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+	ASSERT_EQ(0, ret);
+	TRAP_nr = 0;
+	memset(&TRAP_info, 0, sizeof(TRAP_info));
+	/* Expect the registers to be rolled back. (nr = error) may vary
+	 * based on arch. */
+	ret = syscall(__NR_getpid);
+	/* Silence gcc warning about volatile. */
+	test = TRAP_nr;
+	EXPECT_EQ(SIGSYS, test);
+	struct local_sigsys {
+		void *_call_addr;	/* calling user insn */
+		int _syscall;		/* triggering system call number */
+		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
+	} *sigsys = (struct local_sigsys *)
+#ifdef si_syscall
+		&(TRAP_info.si_call_addr);
+#else
+		&TRAP_info.si_pid;
+#endif
+	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
+	/* Make sure arch is non-zero. */
+	EXPECT_NE(0, sigsys->_arch);
+	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
+}
+
+FIXTURE_DATA(precedence) {
+	struct sock_fprog allow;
+	struct sock_fprog log;
+	struct sock_fprog trace;
+	struct sock_fprog error;
+	struct sock_fprog trap;
+	struct sock_fprog kill;
+};
+
+FIXTURE_SETUP(precedence)
+{
+	struct sock_filter allow_insns[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter log_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
+	struct sock_filter trace_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
+	};
+	struct sock_filter error_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
+	};
+	struct sock_filter trap_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+	};
+	struct sock_filter kill_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+	};
+
+	memset(self, 0, sizeof(*self));
+#define FILTER_ALLOC(_x) \
+	self->_x.filter = malloc(sizeof(_x##_insns)); \
+	ASSERT_NE(NULL, self->_x.filter); \
+	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
+	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
+	FILTER_ALLOC(allow);
+	FILTER_ALLOC(log);
+	FILTER_ALLOC(trace);
+	FILTER_ALLOC(error);
+	FILTER_ALLOC(trap);
+	FILTER_ALLOC(kill);
+}
+
+FIXTURE_TEARDOWN(precedence)
+{
+#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
+	FILTER_FREE(allow);
+	FILTER_FREE(log);
+	FILTER_FREE(trace);
+	FILTER_FREE(error);
+	FILTER_FREE(trap);
+	FILTER_FREE(kill);
+}
+
+TEST_F(precedence, allow_ok)
+{
+	pid_t parent, res = 0;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	res = syscall(__NR_getppid);
+	EXPECT_EQ(parent, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
+{
+	pid_t parent, res = 0;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	res = syscall(__NR_getppid);
+	EXPECT_EQ(parent, res);
+	/* getpid() should never return. */
+	res = syscall(__NR_getpid);
+	EXPECT_EQ(0, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third_in_any_order)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* No ptracer */
+	EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth_in_any_order)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* No ptracer */
+	EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth_in_any_order)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+#ifndef PTRACE_O_TRACESECCOMP
+#define PTRACE_O_TRACESECCOMP	0x00000080
+#endif
+
+/* Catch the Ubuntu 12.04 value error. */
+#if PTRACE_EVENT_SECCOMP != 7
+#undef PTRACE_EVENT_SECCOMP
+#endif
+
+#ifndef PTRACE_EVENT_SECCOMP
+#define PTRACE_EVENT_SECCOMP 7
+#endif
+
+#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
+bool tracer_running;
+void tracer_stop(int sig)
+{
+	tracer_running = false;
+}
+
+typedef void tracer_func_t(struct __test_metadata *_metadata,
+			   pid_t tracee, int status, void *args);
+
+void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
+	    tracer_func_t tracer_func, void *args, bool ptrace_syscall)
+{
+	int ret = -1;
+	struct sigaction action = {
+		.sa_handler = tracer_stop,
+	};
+
+	/* Allow external shutdown. */
+	tracer_running = true;
+	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
+
+	errno = 0;
+	while (ret == -1 && errno != EINVAL)
+		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
+	ASSERT_EQ(0, ret) {
+		kill(tracee, SIGKILL);
+	}
+	/* Wait for attach stop */
+	wait(NULL);
+
+	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
+						      PTRACE_O_TRACESYSGOOD :
+						      PTRACE_O_TRACESECCOMP);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
+		kill(tracee, SIGKILL);
+	}
+	ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
+		     tracee, NULL, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Unblock the tracee */
+	ASSERT_EQ(1, write(fd, "A", 1));
+	ASSERT_EQ(0, close(fd));
+
+	/* Run until we're shut down. Must assert to stop execution. */
+	while (tracer_running) {
+		int status;
+
+		if (wait(&status) != tracee)
+			continue;
+		if (WIFSIGNALED(status) || WIFEXITED(status))
+			/* Child is dead. Time to go. */
+			return;
+
+		/* Check if this is a seccomp event. */
+		ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
+
+		tracer_func(_metadata, tracee, status, args);
+
+		ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
+			     tracee, NULL, 0);
+		ASSERT_EQ(0, ret);
+	}
+	/* Directly report the status of our test harness results. */
+	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+}
+
+/* Common tracer setup/teardown functions. */
+void cont_handler(int num)
+{ }
+pid_t setup_trace_fixture(struct __test_metadata *_metadata,
+			  tracer_func_t func, void *args, bool ptrace_syscall)
+{
+	char sync;
+	int pipefd[2];
+	pid_t tracer_pid;
+	pid_t tracee = getpid();
+
+	/* Setup a pipe for clean synchronization. */
+	ASSERT_EQ(0, pipe(pipefd));
+
+	/* Fork a child which we'll promote to tracer */
+	tracer_pid = fork();
+	ASSERT_LE(0, tracer_pid);
+	signal(SIGALRM, cont_handler);
+	if (tracer_pid == 0) {
+		close(pipefd[0]);
+		start_tracer(_metadata, pipefd[1], tracee, func, args,
+			     ptrace_syscall);
+		syscall(__NR_exit, 0);
+	}
+	close(pipefd[1]);
+	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
+	read(pipefd[0], &sync, 1);
+	close(pipefd[0]);
+
+	return tracer_pid;
+}
+void teardown_trace_fixture(struct __test_metadata *_metadata,
+			    pid_t tracer)
+{
+	if (tracer) {
+		int status;
+		/*
+		 * Extract the exit code from the other process and
+		 * adopt it for ourselves in case its asserts failed.
+		 */
+		ASSERT_EQ(0, kill(tracer, SIGUSR1));
+		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
+		if (WEXITSTATUS(status))
+			_metadata->passed = 0;
+	}
+}
+
+/* "poke" tracer arguments and function. */
+struct tracer_args_poke_t {
+	unsigned long poke_addr;
+};
+
+void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
+		 void *args)
+{
+	int ret;
+	unsigned long msg;
+	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
+
+	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+	EXPECT_EQ(0, ret);
+	/* If this fails, don't try to recover. */
+	ASSERT_EQ(0x1001, msg) {
+		kill(tracee, SIGKILL);
+	}
+	/*
+	 * Poke in the message.
+	 * Registers are not touched to try to keep this relatively arch
+	 * agnostic.
+	 */
+	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
+	EXPECT_EQ(0, ret);
+}
+
+FIXTURE_DATA(TRACE_poke) {
+	struct sock_fprog prog;
+	pid_t tracer;
+	long poked;
+	struct tracer_args_poke_t tracer_args;
+};
+
+FIXTURE_SETUP(TRACE_poke)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+
+	self->poked = 0;
+	memset(&self->prog, 0, sizeof(self->prog));
+	self->prog.filter = malloc(sizeof(filter));
+	ASSERT_NE(NULL, self->prog.filter);
+	memcpy(self->prog.filter, filter, sizeof(filter));
+	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+	/* Set up tracer args. */
+	self->tracer_args.poke_addr = (unsigned long)&self->poked;
+
+	/* Launch tracer. */
+	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
+					   &self->tracer_args, false);
+}
+
+FIXTURE_TEARDOWN(TRACE_poke)
+{
+	teardown_trace_fixture(_metadata, self->tracer);
+	if (self->prog.filter)
+		free(self->prog.filter);
+}
+
+TEST_F(TRACE_poke, read_has_side_effects)
+{
+	ssize_t ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(0, self->poked);
+	ret = read(-1, NULL, 0);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(0x1001, self->poked);
+}
+
+TEST_F(TRACE_poke, getpid_runs_normally)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(0, self->poked);
+	EXPECT_NE(0, syscall(__NR_getpid));
+	EXPECT_EQ(0, self->poked);
+}
+
+#if defined(__x86_64__)
+# define ARCH_REGS	struct user_regs_struct
+# define SYSCALL_NUM	orig_rax
+# define SYSCALL_RET	rax
+#elif defined(__i386__)
+# define ARCH_REGS	struct user_regs_struct
+# define SYSCALL_NUM	orig_eax
+# define SYSCALL_RET	eax
+#elif defined(__arm__)
+# define ARCH_REGS	struct pt_regs
+# define SYSCALL_NUM	ARM_r7
+# define SYSCALL_RET	ARM_r0
+#elif defined(__aarch64__)
+# define ARCH_REGS	struct user_pt_regs
+# define SYSCALL_NUM	regs[8]
+# define SYSCALL_RET	regs[0]
+#elif defined(__hppa__)
+# define ARCH_REGS	struct user_regs_struct
+# define SYSCALL_NUM	gr[20]
+# define SYSCALL_RET	gr[28]
+#elif defined(__powerpc__)
+# define ARCH_REGS	struct pt_regs
+# define SYSCALL_NUM	gpr[0]
+# define SYSCALL_RET	gpr[3]
+#elif defined(__s390__)
+# define ARCH_REGS     s390_regs
+# define SYSCALL_NUM   gprs[2]
+# define SYSCALL_RET   gprs[2]
+#elif defined(__mips__)
+# define ARCH_REGS	struct pt_regs
+# define SYSCALL_NUM	regs[2]
+# define SYSCALL_SYSCALL_NUM regs[4]
+# define SYSCALL_RET	regs[2]
+# define SYSCALL_NUM_RET_SHARE_REG
+#else
+# error "Do not know how to find your architecture's registers and syscalls"
+#endif
+
+/* When the syscall return can't be changed, stub out the tests for it. */
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+# define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
+#else
+# define EXPECT_SYSCALL_RETURN(val, action)		\
+	do {						\
+		errno = 0;				\
+		if (val < 0) {				\
+			EXPECT_EQ(-1, action);		\
+			EXPECT_EQ(-(val), errno);	\
+		} else {				\
+			EXPECT_EQ(val, action);		\
+		}					\
+	} while (0)
+#endif
+
+/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
+ * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
+ */
+#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
+#define HAVE_GETREGS
+#endif
+
+/* Architecture-specific syscall fetching routine. */
+int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
+{
+	ARCH_REGS regs;
+#ifdef HAVE_GETREGS
+	EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
+		TH_LOG("PTRACE_GETREGS failed");
+		return -1;
+	}
+#else
+	struct iovec iov;
+
+	iov.iov_base = &regs;
+	iov.iov_len = sizeof(regs);
+	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
+		TH_LOG("PTRACE_GETREGSET failed");
+		return -1;
+	}
+#endif
+
+#if defined(__mips__)
+	if (regs.SYSCALL_NUM == __NR_O32_Linux)
+		return regs.SYSCALL_SYSCALL_NUM;
+#endif
+	return regs.SYSCALL_NUM;
+}
+
+/* Architecture-specific syscall changing routine. */
+void change_syscall(struct __test_metadata *_metadata,
+		    pid_t tracee, int syscall, int result)
+{
+	int ret;
+	ARCH_REGS regs;
+#ifdef HAVE_GETREGS
+	ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
+#else
+	struct iovec iov;
+	iov.iov_base = &regs;
+	iov.iov_len = sizeof(regs);
+	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
+#endif
+	EXPECT_EQ(0, ret) {}
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
+    defined(__s390__) || defined(__hppa__)
+	{
+		regs.SYSCALL_NUM = syscall;
+	}
+#elif defined(__mips__)
+	{
+		if (regs.SYSCALL_NUM == __NR_O32_Linux)
+			regs.SYSCALL_SYSCALL_NUM = syscall;
+		else
+			regs.SYSCALL_NUM = syscall;
+	}
+
+#elif defined(__arm__)
+# ifndef PTRACE_SET_SYSCALL
+#  define PTRACE_SET_SYSCALL   23
+# endif
+	{
+		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
+		EXPECT_EQ(0, ret);
+	}
+
+#elif defined(__aarch64__)
+# ifndef NT_ARM_SYSTEM_CALL
+#  define NT_ARM_SYSTEM_CALL 0x404
+# endif
+	{
+		iov.iov_base = &syscall;
+		iov.iov_len = sizeof(syscall);
+		ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
+			     &iov);
+		EXPECT_EQ(0, ret);
+	}
+
+#else
+	ASSERT_EQ(1, 0) {
+		TH_LOG("How is the syscall changed on this architecture?");
+	}
+#endif
+
+	/* If syscall is skipped, change return value. */
+	if (syscall == -1)
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+		TH_LOG("Can't modify syscall return on this architecture");
+#else
+		regs.SYSCALL_RET = result;
+#endif
+
+#ifdef HAVE_GETREGS
+	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
+#else
+	iov.iov_base = &regs;
+	iov.iov_len = sizeof(regs);
+	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
+#endif
+	EXPECT_EQ(0, ret);
+}
+
+void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
+		    int status, void *args)
+{
+	int ret;
+	unsigned long msg;
+
+	/* Make sure we got the right message. */
+	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+	EXPECT_EQ(0, ret);
+
+	/* Validate and take action on expected syscalls. */
+	switch (msg) {
+	case 0x1002:
+		/* change getpid to getppid. */
+		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
+		change_syscall(_metadata, tracee, __NR_getppid, 0);
+		break;
+	case 0x1003:
+		/* skip gettid with valid return code. */
+		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
+		change_syscall(_metadata, tracee, -1, 45000);
+		break;
+	case 0x1004:
+		/* skip openat with error. */
+		EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
+		change_syscall(_metadata, tracee, -1, -ESRCH);
+		break;
+	case 0x1005:
+		/* do nothing (allow getppid) */
+		EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
+		break;
+	default:
+		EXPECT_EQ(0, msg) {
+			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
+			kill(tracee, SIGKILL);
+		}
+	}
+
+}
+
+void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
+		   int status, void *args)
+{
+	int ret, nr;
+	unsigned long msg;
+	static bool entry;
+
+	/* Make sure we got an empty message. */
+	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+	EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, msg);
+
+	/* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
+	entry = !entry;
+	if (!entry)
+		return;
+
+	nr = get_syscall(_metadata, tracee);
+
+	if (nr == __NR_getpid)
+		change_syscall(_metadata, tracee, __NR_getppid, 0);
+	if (nr == __NR_gettid)
+		change_syscall(_metadata, tracee, -1, 45000);
+	if (nr == __NR_openat)
+		change_syscall(_metadata, tracee, -1, -ESRCH);
+}
+
+FIXTURE_DATA(TRACE_syscall) {
+	struct sock_fprog prog;
+	pid_t tracer, mytid, mypid, parent;
+};
+
+FIXTURE_SETUP(TRACE_syscall)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+
+	memset(&self->prog, 0, sizeof(self->prog));
+	self->prog.filter = malloc(sizeof(filter));
+	ASSERT_NE(NULL, self->prog.filter);
+	memcpy(self->prog.filter, filter, sizeof(filter));
+	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+	/* Prepare some testable syscall results. */
+	self->mytid = syscall(__NR_gettid);
+	ASSERT_GT(self->mytid, 0);
+	ASSERT_NE(self->mytid, 1) {
+		TH_LOG("Running this test as init is not supported. :)");
+	}
+
+	self->mypid = getpid();
+	ASSERT_GT(self->mypid, 0);
+	ASSERT_EQ(self->mytid, self->mypid);
+
+	self->parent = getppid();
+	ASSERT_GT(self->parent, 0);
+	ASSERT_NE(self->parent, self->mypid);
+
+	/* Launch tracer. */
+	self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
+					   false);
+}
+
+FIXTURE_TEARDOWN(TRACE_syscall)
+{
+	teardown_trace_fixture(_metadata, self->tracer);
+	if (self->prog.filter)
+		free(self->prog.filter);
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_redirected)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer will redirect getpid to getppid. */
+	EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_errno)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer should skip the open syscall, resulting in ESRCH. */
+	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_faked)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer should skip the gettid syscall, resulting fake pid. */
+	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
+}
+
+TEST_F(TRACE_syscall, syscall_allowed)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* getppid works as expected (no changes). */
+	EXPECT_EQ(self->parent, syscall(__NR_getppid));
+	EXPECT_NE(self->mypid, syscall(__NR_getppid));
+}
+
+TEST_F(TRACE_syscall, syscall_redirected)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* getpid has been redirected to getppid as expected. */
+	EXPECT_EQ(self->parent, syscall(__NR_getpid));
+	EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, syscall_errno)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* openat has been skipped and an errno return. */
+	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
+}
+
+TEST_F(TRACE_syscall, syscall_faked)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* gettid has been skipped and an altered return value stored. */
+	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
+}
+
+TEST_F(TRACE_syscall, skip_after_RET_TRACE)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Install fixture filter. */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Install "errno on getppid" filter. */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
+	errno = 0;
+	EXPECT_EQ(-1, syscall(__NR_getpid));
+	EXPECT_EQ(EPERM, errno);
+}
+
+TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Install fixture filter. */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Install "death on getppid" filter. */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Tracer will redirect getpid to getppid, and we should die. */
+	EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, skip_after_ptrace)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Install "errno on getppid" filter. */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Tracer will redirect getpid to getppid, and we should see EPERM. */
+	EXPECT_EQ(-1, syscall(__NR_getpid));
+	EXPECT_EQ(EPERM, errno);
+}
+
+TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Install "death on getppid" filter. */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Tracer will redirect getpid to getppid, and we should die. */
+	EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST(seccomp_syscall)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	/* Reject insane operation. */
+	ret = seccomp(-1, 0, &prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject crazy op value!");
+	}
+
+	/* Reject strict with flags or pointer. */
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject mode strict with flags!");
+	}
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject mode strict with uargs!");
+	}
+
+	/* Reject insane args for filter. */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject crazy filter flags!");
+	}
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Did not reject NULL filter!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+	EXPECT_EQ(0, errno) {
+		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
+			strerror(errno));
+	}
+}
+
+TEST(seccomp_syscall_mode_lock)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	EXPECT_EQ(0, ret) {
+		TH_LOG("Could not install filter!");
+	}
+
+	/* Make sure neither entry point will switch to strict. */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Switched to mode strict!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Switched to mode strict!");
+	}
+}
+
+/*
+ * Test detection of known and unknown filter flags. Userspace needs to be able
+ * to check if a filter flag is supported by the current kernel and a good way
+ * of doing that is by attempting to enter filter mode, with the flag bit in
+ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
+ * that the flag is valid and EINVAL indicates that the flag is invalid.
+ */
+TEST(detect_seccomp_filter_flags)
+{
+	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
+				 SECCOMP_FILTER_FLAG_LOG,
+				 SECCOMP_FILTER_FLAG_SPEC_ALLOW };
+	unsigned int flag, all_flags;
+	int i;
+	long ret;
+
+	/* Test detection of known-good filter flags */
+	for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+		int bits = 0;
+
+		flag = flags[i];
+		/* Make sure the flag is a single bit! */
+		while (flag) {
+			if (flag & 0x1)
+				bits ++;
+			flag >>= 1;
+		}
+		ASSERT_EQ(1, bits);
+		flag = flags[i];
+
+		ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+		ASSERT_NE(ENOSYS, errno) {
+			TH_LOG("Kernel does not support seccomp syscall!");
+		}
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(EFAULT, errno) {
+			TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
+			       flag);
+		}
+
+		all_flags |= flag;
+	}
+
+	/* Test detection of all known-good filter flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+		       all_flags);
+	}
+
+	/* Test detection of an unknown filter flag */
+	flag = -1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
+		       flag);
+	}
+
+	/*
+	 * Test detection of an unknown filter flag that may simply need to be
+	 * added to this test
+	 */
+	flag = flags[ARRAY_SIZE(flags) - 1] << 1;
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
+		       flag);
+	}
+}
+
+TEST(TSYNC_first)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+		      &prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	EXPECT_EQ(0, ret) {
+		TH_LOG("Could not install initial filter with TSYNC!");
+	}
+}
+
+#define TSYNC_SIBLINGS 2
+struct tsync_sibling {
+	pthread_t tid;
+	pid_t system_tid;
+	sem_t *started;
+	pthread_cond_t *cond;
+	pthread_mutex_t *mutex;
+	int diverge;
+	int num_waits;
+	struct sock_fprog *prog;
+	struct __test_metadata *metadata;
+};
+
+/*
+ * To avoid joining joined threads (which is not allowed by Bionic),
+ * make sure we both successfully join and clear the tid to skip a
+ * later join attempt during fixture teardown. Any remaining threads
+ * will be directly killed during teardown.
+ */
+#define PTHREAD_JOIN(tid, status)					\
+	do {								\
+		int _rc = pthread_join(tid, status);			\
+		if (_rc) {						\
+			TH_LOG("pthread_join of tid %u failed: %d\n",	\
+				(unsigned int)tid, _rc);		\
+		} else {						\
+			tid = 0;					\
+		}							\
+	} while (0)
+
+FIXTURE_DATA(TSYNC) {
+	struct sock_fprog root_prog, apply_prog;
+	struct tsync_sibling sibling[TSYNC_SIBLINGS];
+	sem_t started;
+	pthread_cond_t cond;
+	pthread_mutex_t mutex;
+	int sibling_count;
+};
+
+FIXTURE_SETUP(TSYNC)
+{
+	struct sock_filter root_filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter apply_filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+
+	memset(&self->root_prog, 0, sizeof(self->root_prog));
+	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
+	memset(&self->sibling, 0, sizeof(self->sibling));
+	self->root_prog.filter = malloc(sizeof(root_filter));
+	ASSERT_NE(NULL, self->root_prog.filter);
+	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
+	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
+
+	self->apply_prog.filter = malloc(sizeof(apply_filter));
+	ASSERT_NE(NULL, self->apply_prog.filter);
+	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
+	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
+
+	self->sibling_count = 0;
+	pthread_mutex_init(&self->mutex, NULL);
+	pthread_cond_init(&self->cond, NULL);
+	sem_init(&self->started, 0, 0);
+	self->sibling[0].tid = 0;
+	self->sibling[0].cond = &self->cond;
+	self->sibling[0].started = &self->started;
+	self->sibling[0].mutex = &self->mutex;
+	self->sibling[0].diverge = 0;
+	self->sibling[0].num_waits = 1;
+	self->sibling[0].prog = &self->root_prog;
+	self->sibling[0].metadata = _metadata;
+	self->sibling[1].tid = 0;
+	self->sibling[1].cond = &self->cond;
+	self->sibling[1].started = &self->started;
+	self->sibling[1].mutex = &self->mutex;
+	self->sibling[1].diverge = 0;
+	self->sibling[1].prog = &self->root_prog;
+	self->sibling[1].num_waits = 1;
+	self->sibling[1].metadata = _metadata;
+}
+
+FIXTURE_TEARDOWN(TSYNC)
+{
+	int sib = 0;
+
+	if (self->root_prog.filter)
+		free(self->root_prog.filter);
+	if (self->apply_prog.filter)
+		free(self->apply_prog.filter);
+
+	for ( ; sib < self->sibling_count; ++sib) {
+		struct tsync_sibling *s = &self->sibling[sib];
+
+		if (!s->tid)
+			continue;
+		/*
+		 * If a thread is still running, it may be stuck, so hit
+		 * it over the head really hard.
+		 */
+		pthread_kill(s->tid, 9);
+	}
+	pthread_mutex_destroy(&self->mutex);
+	pthread_cond_destroy(&self->cond);
+	sem_destroy(&self->started);
+}
+
+void *tsync_sibling(void *data)
+{
+	long ret = 0;
+	struct tsync_sibling *me = data;
+
+	me->system_tid = syscall(__NR_gettid);
+
+	pthread_mutex_lock(me->mutex);
+	if (me->diverge) {
+		/* Just re-apply the root prog to fork the tree */
+		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+				me->prog, 0, 0);
+	}
+	sem_post(me->started);
+	/* Return outside of started so parent notices failures. */
+	if (ret) {
+		pthread_mutex_unlock(me->mutex);
+		return (void *)SIBLING_EXIT_FAILURE;
+	}
+	do {
+		pthread_cond_wait(me->cond, me->mutex);
+		me->num_waits = me->num_waits - 1;
+	} while (me->num_waits);
+	pthread_mutex_unlock(me->mutex);
+
+	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+	if (!ret)
+		return (void *)SIBLING_EXIT_NEWPRIVS;
+	read(0, NULL, 0);
+	return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+void tsync_start_sibling(struct tsync_sibling *sibling)
+{
+	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
+}
+
+TEST_F(TSYNC, siblings_fail_prctl)
+{
+	long ret;
+	void *status;
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	/* Check prctl failure detection by requesting sib 0 diverge. */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_EQ(0, ret) {
+		TH_LOG("setting filter failed");
+	}
+
+	self->sibling[0].diverge = 1;
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	/* Signal the threads to clean up*/
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+
+	/* Ensure diverging sibling failed to call prctl. */
+	PTHREAD_JOIN(self->sibling[0].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
+	PTHREAD_JOIN(self->sibling[1].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_ancestor)
+{
+	long ret;
+	void *status;
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+	}
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Could install filter on all threads!");
+	}
+	/* Tell the siblings to test the policy */
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+	/* Ensure they are both killed and don't exit cleanly. */
+	PTHREAD_JOIN(self->sibling[0].tid, &status);
+	EXPECT_EQ(0x0, (long)status);
+	PTHREAD_JOIN(self->sibling[1].tid, &status);
+	EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_sibling_want_nnp)
+{
+	void *status;
+
+	/* start siblings before any prctl() operations */
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	/* Tell the siblings to test no policy */
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+
+	/* Ensure they are both upset about lacking nnp. */
+	PTHREAD_JOIN(self->sibling[0].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+	PTHREAD_JOIN(self->sibling[1].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_no_filter)
+{
+	long ret;
+	void *status;
+
+	/* start siblings before any prctl() operations */
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+		      &self->apply_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Could install filter on all threads!");
+	}
+
+	/* Tell the siblings to test the policy */
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+
+	/* Ensure they are both killed and don't exit cleanly. */
+	PTHREAD_JOIN(self->sibling[0].tid, &status);
+	EXPECT_EQ(0x0, (long)status);
+	PTHREAD_JOIN(self->sibling[1].tid, &status);
+	EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_one_divergence)
+{
+	long ret;
+	void *status;
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+	}
+	self->sibling[0].diverge = 1;
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(self->sibling[0].system_tid, ret) {
+		TH_LOG("Did not fail on diverged sibling.");
+	}
+
+	/* Wake the threads */
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+
+	/* Ensure they are both unkilled. */
+	PTHREAD_JOIN(self->sibling[0].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+	PTHREAD_JOIN(self->sibling[1].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_not_under_filter)
+{
+	long ret, sib;
+	void *status;
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	/*
+	 * Sibling 0 will have its own seccomp policy
+	 * and Sibling 1 will not be under seccomp at
+	 * all. Sibling 1 will enter seccomp and 0
+	 * will cause failure.
+	 */
+	self->sibling[0].diverge = 1;
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(ret, self->sibling[0].system_tid) {
+		TH_LOG("Did not fail on diverged sibling.");
+	}
+	sib = 1;
+	if (ret == self->sibling[0].system_tid)
+		sib = 0;
+
+	pthread_mutex_lock(&self->mutex);
+
+	/* Increment the other siblings num_waits so we can clean up
+	 * the one we just saw.
+	 */
+	self->sibling[!sib].num_waits += 1;
+
+	/* Signal the thread to clean up*/
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+	PTHREAD_JOIN(self->sibling[sib].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+	/* Poll for actual task death. pthread_join doesn't guarantee it. */
+	while (!kill(self->sibling[sib].system_tid, 0))
+		sleep(0.1);
+	/* Switch to the remaining sibling */
+	sib = !sib;
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Expected the remaining sibling to sync");
+	};
+
+	pthread_mutex_lock(&self->mutex);
+
+	/* If remaining sibling didn't have a chance to wake up during
+	 * the first broadcast, manually reduce the num_waits now.
+	 */
+	if (self->sibling[sib].num_waits > 1)
+		self->sibling[sib].num_waits = 1;
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+	PTHREAD_JOIN(self->sibling[sib].tid, &status);
+	EXPECT_EQ(0, (long)status);
+	/* Poll for actual task death. pthread_join doesn't guarantee it. */
+	while (!kill(self->sibling[sib].system_tid, 0))
+		sleep(0.1);
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(0, ret);  /* just us chickens */
+}
+
+/* Make sure restarted syscalls are seen directly as "restart_syscall". */
+TEST(syscall_restart)
+{
+	long ret;
+	unsigned long msg;
+	pid_t child_pid;
+	int pipefd[2];
+	int status;
+	siginfo_t info = { };
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			 offsetof(struct seccomp_data, nr)),
+
+#ifdef __NR_sigreturn
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
+#endif
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
+
+		/* Allow __NR_write for easy logging. */
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		/* The nanosleep jump target. */
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
+		/* The restart_syscall jump target. */
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+#if defined(__arm__)
+	struct utsname utsbuf;
+#endif
+
+	ASSERT_EQ(0, pipe(pipefd));
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		/* Child uses EXPECT not ASSERT to deliver status correctly. */
+		char buf = ' ';
+		struct timespec timeout = { };
+
+		/* Attach parent as tracer and stop. */
+		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
+		EXPECT_EQ(0, raise(SIGSTOP));
+
+		EXPECT_EQ(0, close(pipefd[1]));
+
+		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+		}
+
+		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+		EXPECT_EQ(0, ret) {
+			TH_LOG("Failed to install filter!");
+		}
+
+		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+			TH_LOG("Failed to read() sync from parent");
+		}
+		EXPECT_EQ('.', buf) {
+			TH_LOG("Failed to get sync data from read()");
+		}
+
+		/* Start nanosleep to be interrupted. */
+		timeout.tv_sec = 1;
+		errno = 0;
+		EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
+			TH_LOG("Call to nanosleep() failed (errno %d)", errno);
+		}
+
+		/* Read final sync from parent. */
+		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+			TH_LOG("Failed final read() from parent");
+		}
+		EXPECT_EQ('!', buf) {
+			TH_LOG("Failed to get final data from read()");
+		}
+
+		/* Directly report the status of our test harness results. */
+		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
+						     : EXIT_FAILURE);
+	}
+	EXPECT_EQ(0, close(pipefd[0]));
+
+	/* Attach to child, setup options, and release. */
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	ASSERT_EQ(true, WIFSTOPPED(status));
+	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
+			    PTRACE_O_TRACESECCOMP));
+	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+	ASSERT_EQ(1, write(pipefd[1], ".", 1));
+
+	/* Wait for nanosleep() to start. */
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	ASSERT_EQ(true, WIFSTOPPED(status));
+	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+	ASSERT_EQ(0x100, msg);
+	EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
+
+	/* Might as well check siginfo for sanity while we're here. */
+	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+	ASSERT_EQ(SIGTRAP, info.si_signo);
+	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
+	EXPECT_EQ(0, info.si_errno);
+	EXPECT_EQ(getuid(), info.si_uid);
+	/* Verify signal delivery came from child (seccomp-triggered). */
+	EXPECT_EQ(child_pid, info.si_pid);
+
+	/* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
+	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
+	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	ASSERT_EQ(true, WIFSTOPPED(status));
+	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
+	/* Verify signal delivery came from parent now. */
+	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+	EXPECT_EQ(getpid(), info.si_pid);
+
+	/* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
+	ASSERT_EQ(0, kill(child_pid, SIGCONT));
+	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	ASSERT_EQ(true, WIFSTOPPED(status));
+	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
+	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+
+	/* Wait for restart_syscall() to start. */
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	ASSERT_EQ(true, WIFSTOPPED(status));
+	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+
+	ASSERT_EQ(0x200, msg);
+	ret = get_syscall(_metadata, child_pid);
+#if defined(__arm__)
+	/*
+	 * FIXME:
+	 * - native ARM registers do NOT expose true syscall.
+	 * - compat ARM registers on ARM64 DO expose true syscall.
+	 */
+	ASSERT_EQ(0, uname(&utsbuf));
+	if (strncmp(utsbuf.machine, "arm", 3) == 0) {
+		EXPECT_EQ(__NR_nanosleep, ret);
+	} else
+#endif
+	{
+		EXPECT_EQ(__NR_restart_syscall, ret);
+	}
+
+	/* Write again to end test. */
+	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+	ASSERT_EQ(1, write(pipefd[1], "!", 1));
+	EXPECT_EQ(0, close(pipefd[1]));
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	if (WIFSIGNALED(status) || WEXITSTATUS(status))
+		_metadata->passed = 0;
+}
+
+TEST_SIGNAL(filter_flag_log, SIGSYS)
+{
+	struct sock_filter allow_filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter kill_filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog allow_prog = {
+		.len = (unsigned short)ARRAY_SIZE(allow_filter),
+		.filter = allow_filter,
+	};
+	struct sock_fprog kill_prog = {
+		.len = (unsigned short)ARRAY_SIZE(kill_filter),
+		.filter = kill_filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	EXPECT_NE(0, ret) {
+		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
+	}
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
+	}
+
+	/* Verify that a simple, permissive filter can be added with no flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+	EXPECT_EQ(0, ret);
+
+	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
+	}
+	EXPECT_EQ(0, ret);
+
+	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &kill_prog);
+	EXPECT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST(get_action_avail)
+{
+	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
+			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
+			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
+	__u32 unknown_action = 0x10000000U;
+	int i;
+	long ret;
+
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
+	}
+	EXPECT_EQ(ret, 0);
+
+	for (i = 0; i < ARRAY_SIZE(actions); i++) {
+		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
+		EXPECT_EQ(ret, 0) {
+			TH_LOG("Expected action (0x%X) not available!",
+			       actions[i]);
+		}
+	}
+
+	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
+	EXPECT_EQ(ret, -1);
+	EXPECT_EQ(errno, EOPNOTSUPP);
+}
+
+TEST(get_metadata)
+{
+	pid_t pid;
+	int pipefd[2];
+	char buf;
+	struct seccomp_metadata md;
+	long ret;
+
+	/* Only real root can get metadata. */
+	if (geteuid()) {
+		XFAIL(return, "get_metadata requires real root");
+		return;
+	}
+
+	ASSERT_EQ(0, pipe(pipefd));
+
+	pid = fork();
+	ASSERT_GE(pid, 0);
+	if (pid == 0) {
+		struct sock_filter filter[] = {
+			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		};
+		struct sock_fprog prog = {
+			.len = (unsigned short)ARRAY_SIZE(filter),
+			.filter = filter,
+		};
+
+		/* one with log, one without */
+		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
+				     SECCOMP_FILTER_FLAG_LOG, &prog));
+		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+
+		ASSERT_EQ(0, close(pipefd[0]));
+		ASSERT_EQ(1, write(pipefd[1], "1", 1));
+		ASSERT_EQ(0, close(pipefd[1]));
+
+		while (1)
+			sleep(100);
+	}
+
+	ASSERT_EQ(0, close(pipefd[1]));
+	ASSERT_EQ(1, read(pipefd[0], &buf, 1));
+
+	ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
+	ASSERT_EQ(pid, waitpid(pid, NULL, 0));
+
+	/* Past here must not use ASSERT or child process is never killed. */
+
+	md.filter_off = 0;
+	errno = 0;
+	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+	EXPECT_EQ(sizeof(md), ret) {
+		if (errno == EINVAL)
+			XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
+	}
+
+	EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
+	EXPECT_EQ(md.filter_off, 0);
+
+	md.filter_off = 1;
+	ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+	EXPECT_EQ(sizeof(md), ret);
+	EXPECT_EQ(md.flags, 0);
+	EXPECT_EQ(md.filter_off, 1);
+
+skip:
+	ASSERT_EQ(0, kill(pid, SIGKILL));
+}
+
+/*
+ * TODO:
+ * - add microbenchmarks
+ * - expand NNP testing
+ * - better arch-specific TRACE and TRAP handlers.
+ * - endianness checking when appropriate
+ * - 64-bit arg prodding
+ * - arch value testing (x86 modes especially)
+ * - verify that FILTER_FLAG_LOG filters generate log messages
+ * - verify that RET_LOG generates log messages
+ * - ...
+ */
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/sigaltstack/.gitignore b/tools/testing/selftests/sigaltstack/.gitignore
new file mode 100644
index 000000000..35897b0a3
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/.gitignore
@@ -0,0 +1 @@
+sas
diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/sigaltstack/Makefile
new file mode 100644
index 000000000..f68fbf80d
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/Makefile
@@ -0,0 +1,5 @@
+CFLAGS = -Wall
+TEST_GEN_PROGS = sas
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
new file mode 100644
index 000000000..228c2ae47
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stas Sergeev <stsp@users.sourceforge.net>
+ *
+ * test sigaltstack(SS_ONSTACK | SS_AUTODISARM)
+ * If that succeeds, then swapcontext() can be used inside sighandler safely.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <ucontext.h>
+#include <alloca.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+
+#ifndef SS_AUTODISARM
+#define SS_AUTODISARM  (1U << 31)
+#endif
+
+static void *sstack, *ustack;
+static ucontext_t uc, sc;
+static const char *msg = "[OK]\tStack preserved";
+static const char *msg2 = "[FAIL]\tStack corrupted";
+struct stk_data {
+	char msg[128];
+	int flag;
+};
+
+void my_usr1(int sig, siginfo_t *si, void *u)
+{
+	char *aa;
+	int err;
+	stack_t stk;
+	struct stk_data *p;
+
+#if __s390x__
+	register unsigned long sp asm("%15");
+#else
+	register unsigned long sp asm("sp");
+#endif
+
+	if (sp < (unsigned long)sstack ||
+			sp >= (unsigned long)sstack + SIGSTKSZ) {
+		ksft_exit_fail_msg("SP is not on sigaltstack\n");
+	}
+	/* put some data on stack. other sighandler will try to overwrite it */
+	aa = alloca(1024);
+	assert(aa);
+	p = (struct stk_data *)(aa + 512);
+	strcpy(p->msg, msg);
+	p->flag = 1;
+	ksft_print_msg("[RUN]\tsignal USR1\n");
+	err = sigaltstack(NULL, &stk);
+	if (err) {
+		ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+	if (stk.ss_flags != SS_DISABLE)
+		ksft_test_result_fail("tss_flags=%x, should be SS_DISABLE\n",
+				stk.ss_flags);
+	else
+		ksft_test_result_pass(
+				"sigaltstack is disabled in sighandler\n");
+	swapcontext(&sc, &uc);
+	ksft_print_msg("%s\n", p->msg);
+	if (!p->flag) {
+		ksft_exit_skip("[RUN]\tAborting\n");
+		exit(EXIT_FAILURE);
+	}
+}
+
+void my_usr2(int sig, siginfo_t *si, void *u)
+{
+	char *aa;
+	struct stk_data *p;
+
+	ksft_print_msg("[RUN]\tsignal USR2\n");
+	aa = alloca(1024);
+	/* dont run valgrind on this */
+	/* try to find the data stored by previous sighandler */
+	p = memmem(aa, 1024, msg, strlen(msg));
+	if (p) {
+		ksft_test_result_fail("sigaltstack re-used\n");
+		/* corrupt the data */
+		strcpy(p->msg, msg2);
+		/* tell other sighandler that his data is corrupted */
+		p->flag = 0;
+	}
+}
+
+static void switch_fn(void)
+{
+	ksft_print_msg("[RUN]\tswitched to user ctx\n");
+	raise(SIGUSR2);
+	setcontext(&sc);
+}
+
+int main(void)
+{
+	struct sigaction act;
+	stack_t stk;
+	int err;
+
+	ksft_print_header();
+
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = SA_ONSTACK | SA_SIGINFO;
+	act.sa_sigaction = my_usr1;
+	sigaction(SIGUSR1, &act, NULL);
+	act.sa_sigaction = my_usr2;
+	sigaction(SIGUSR2, &act, NULL);
+	sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+	if (sstack == MAP_FAILED) {
+		ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
+		return EXIT_FAILURE;
+	}
+
+	err = sigaltstack(NULL, &stk);
+	if (err) {
+		ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+	if (stk.ss_flags == SS_DISABLE) {
+		ksft_test_result_pass(
+				"Initial sigaltstack state was SS_DISABLE\n");
+	} else {
+		ksft_exit_fail_msg("Initial sigaltstack state was %x; "
+		       "should have been SS_DISABLE\n", stk.ss_flags);
+		return EXIT_FAILURE;
+	}
+
+	stk.ss_sp = sstack;
+	stk.ss_size = SIGSTKSZ;
+	stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
+	err = sigaltstack(&stk, NULL);
+	if (err) {
+		if (errno == EINVAL) {
+			ksft_exit_skip(
+				"[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
+			/*
+			 * If test cases for the !SS_AUTODISARM variant were
+			 * added, we could still run them.  We don't have any
+			 * test cases like that yet, so just exit and report
+			 * success.
+			 */
+			return 0;
+		} else {
+			ksft_exit_fail_msg(
+				"sigaltstack(SS_ONSTACK | SS_AUTODISARM)  %s\n",
+					strerror(errno));
+			return EXIT_FAILURE;
+		}
+	}
+
+	ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+	if (ustack == MAP_FAILED) {
+		ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
+		return EXIT_FAILURE;
+	}
+	getcontext(&uc);
+	uc.uc_link = NULL;
+	uc.uc_stack.ss_sp = ustack;
+	uc.uc_stack.ss_size = SIGSTKSZ;
+	makecontext(&uc, switch_fn, 0);
+	raise(SIGUSR1);
+
+	err = sigaltstack(NULL, &stk);
+	if (err) {
+		ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+	if (stk.ss_flags != SS_AUTODISARM) {
+		ksft_exit_fail_msg("ss_flags=%x, should be SS_AUTODISARM\n",
+				stk.ss_flags);
+		exit(EXIT_FAILURE);
+	}
+	ksft_test_result_pass(
+			"sigaltstack is still SS_AUTODISARM after signal\n");
+
+	ksft_exit_pass();
+	return 0;
+}
diff --git a/tools/testing/selftests/size/.gitignore b/tools/testing/selftests/size/.gitignore
new file mode 100644
index 000000000..189b7818d
--- /dev/null
+++ b/tools/testing/selftests/size/.gitignore
@@ -0,0 +1 @@
+get_size
diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile
new file mode 100644
index 000000000..4685b3e42
--- /dev/null
+++ b/tools/testing/selftests/size/Makefile
@@ -0,0 +1,5 @@
+CFLAGS := -static -ffreestanding -nostartfiles -s
+
+TEST_GEN_PROGS := get_size
+
+include ../lib.mk
diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c
new file mode 100644
index 000000000..f55943b6d
--- /dev/null
+++ b/tools/testing/selftests/size/get_size.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2014 Sony Mobile Communications Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftest for runtime system size
+ *
+ * Prints the amount of RAM that the currently running system is using.
+ *
+ * This program tries to be as small as possible itself, to
+ * avoid perturbing the system memory utilization with its
+ * own execution.  It also attempts to have as few dependencies
+ * on kernel features as possible.
+ *
+ * It should be statically linked, with startup libs avoided.  It uses
+ * no library calls except the syscall() function for the following 3
+ * syscalls:
+ *   sysinfo(), write(), and _exit()
+ *
+ * For output, it avoids printf (which in some C libraries
+ * has large external dependencies) by  implementing it's own
+ * number output and print routines, and using __builtin_strlen()
+ *
+ * The test may crash if any of the above syscalls fails because in some
+ * libc implementations (e.g. the GNU C Library) errno is saved in
+ * thread-local storage, which does not get initialized due to avoiding
+ * startup libs.
+ */
+
+#include <sys/sysinfo.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#define STDOUT_FILENO 1
+
+static int print(const char *s)
+{
+	size_t len = 0;
+
+	while (s[len] != '\0')
+		len++;
+
+	return syscall(SYS_write, STDOUT_FILENO, s, len);
+}
+
+static inline char *num_to_str(unsigned long num, char *buf, int len)
+{
+	unsigned int digit;
+
+	/* put digits in buffer from back to front */
+	buf += len - 1;
+	*buf = 0;
+	do {
+		digit = num % 10;
+		*(--buf) = digit + '0';
+		num /= 10;
+	} while (num > 0);
+
+	return buf;
+}
+
+static int print_num(unsigned long num)
+{
+	char num_buf[30];
+
+	return print(num_to_str(num, num_buf, sizeof(num_buf)));
+}
+
+static int print_k_value(const char *s, unsigned long num, unsigned long units)
+{
+	unsigned long long temp;
+	int ccode;
+
+	print(s);
+
+	temp = num;
+	temp = (temp * units)/1024;
+	num = temp;
+	ccode = print_num(num);
+	print("\n");
+	return ccode;
+}
+
+/* this program has no main(), as startup libraries are not used */
+void _start(void)
+{
+	int ccode;
+	struct sysinfo info;
+	unsigned long used;
+	static const char *test_name = " get runtime memory use\n";
+
+	print("TAP version 13\n");
+	print("# Testing system size.\n");
+
+	ccode = syscall(SYS_sysinfo, &info);
+	if (ccode < 0) {
+		print("not ok 1");
+		print(test_name);
+		print(" ---\n reason: \"could not get sysinfo\"\n ...\n");
+		syscall(SYS_exit, ccode);
+	}
+	print("ok 1");
+	print(test_name);
+
+	/* ignore cache complexities for now */
+	used = info.totalram - info.freeram - info.bufferram;
+	print("# System runtime memory report (units in Kilobytes):\n");
+	print(" ---\n");
+	print_k_value(" Total:  ", info.totalram, info.mem_unit);
+	print_k_value(" Free:   ", info.freeram, info.mem_unit);
+	print_k_value(" Buffer: ", info.bufferram, info.mem_unit);
+	print_k_value(" In use: ", used, info.mem_unit);
+	print(" ...\n");
+	print("1..1\n");
+
+	syscall(SYS_exit, 0);
+}
diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile
new file mode 100644
index 000000000..a19531dba
--- /dev/null
+++ b/tools/testing/selftests/sparc64/Makefile
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/)
+
+ifneq ($(ARCH),sparc64)
+nothing:
+.PHONY: all clean run_tests install
+.SILENT:
+else
+
+SUBDIRS := drivers
+
+TEST_PROGS := run.sh
+
+
+.PHONY: all clean
+
+include ../lib.mk
+
+all:
+	@for DIR in $(SUBDIRS); do		\
+		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+		#SUBDIR test prog name should be in the form: SUBDIR_test.sh \
+		TEST=$$DIR"_test.sh"; \
+		if [ -e $$DIR/$$TEST ]; then \
+			rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+		fi \
+	done
+
+override define INSTALL_RULE
+	mkdir -p $(INSTALL_PATH)
+	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+	@for SUBDIR in $(SUBDIRS); do \
+		BUILD_TARGET=$(OUTPUT)/$$SUBDIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		$(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+	done;
+endef
+
+override define CLEAN
+	@for DIR in $(SUBDIRS); do		\
+		BUILD_TARGET=$(OUTPUT)/$$DIR;	\
+		mkdir $$BUILD_TARGET  -p;	\
+		make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+	done
+endef
+endif
diff --git a/tools/testing/selftests/sparc64/drivers/.gitignore b/tools/testing/selftests/sparc64/drivers/.gitignore
new file mode 100644
index 000000000..90e835ed7
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/.gitignore
@@ -0,0 +1 @@
+adi-test
diff --git a/tools/testing/selftests/sparc64/drivers/Makefile b/tools/testing/selftests/sparc64/drivers/Makefile
new file mode 100644
index 000000000..deb0df415
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+INCLUDEDIR := -I.
+CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
+
+TEST_GEN_FILES := adi-test
+
+all: $(TEST_GEN_FILES)
+
+$(TEST_GEN_FILES): adi-test.c
+
+TEST_PROGS := drivers_test.sh
+
+include ../../lib.mk
+
+$(OUTPUT)/adi-test: adi-test.c
diff --git a/tools/testing/selftests/sparc64/drivers/adi-test.c b/tools/testing/selftests/sparc64/drivers/adi-test.c
new file mode 100644
index 000000000..95d93c6a8
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/adi-test.c
@@ -0,0 +1,721 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * selftest for sparc64's privileged ADI driver
+ *
+ * Author: Tom Hromatka <tom.hromatka@oracle.com>
+ */
+#include <linux/kernel.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "../../kselftest.h"
+
+#define DEBUG_LEVEL_1_BIT	(0x0001)
+#define DEBUG_LEVEL_2_BIT	(0x0002)
+#define DEBUG_LEVEL_3_BIT	(0x0004)
+#define DEBUG_LEVEL_4_BIT	(0x0008)
+#define DEBUG_TIMING_BIT	(0x1000)
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+/* bit mask of enabled bits to print */
+#define DEBUG 0x0001
+
+#define DEBUG_PRINT_L1(...)	debug_print(DEBUG_LEVEL_1_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L2(...)	debug_print(DEBUG_LEVEL_2_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L3(...)	debug_print(DEBUG_LEVEL_3_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L4(...)	debug_print(DEBUG_LEVEL_4_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_T(...)	debug_print(DEBUG_TIMING_BIT, __VA_ARGS__)
+
+static void debug_print(int level, const char *s, ...)
+{
+	va_list args;
+
+	va_start(args, s);
+
+	if (DEBUG & level)
+		vfprintf(stdout, s, args);
+	va_end(args);
+}
+
+#ifndef min
+#define min(x, y) ((x) < (y) ? x : y)
+#endif
+
+#define RETURN_FROM_TEST(_ret) \
+	do { \
+		DEBUG_PRINT_L1( \
+			"\tTest %s returned %d\n", __func__, _ret); \
+		return _ret; \
+	} while (0)
+
+#define ADI_BLKSZ	64
+#define ADI_MAX_VERSION	15
+
+#define TEST_STEP_FAILURE(_ret) \
+	do { \
+		fprintf(stderr, "\tTest step failure: %d at %s:%d\n", \
+			_ret, __func__, __LINE__); \
+		goto out; \
+	} while (0)
+
+#define RDTICK(_x) \
+	asm volatile(" rd %%tick, %0\n" : "=r" (_x))
+
+static int random_version(void)
+{
+	long tick;
+
+	RDTICK(tick);
+
+	return tick % (ADI_MAX_VERSION + 1);
+}
+
+#define MAX_RANGES_SUPPORTED	5
+static const char system_ram_str[] = "System RAM\n";
+static int range_count;
+static unsigned long long int start_addr[MAX_RANGES_SUPPORTED];
+static unsigned long long int   end_addr[MAX_RANGES_SUPPORTED];
+
+struct stats {
+	char		name[16];
+	unsigned long	total;
+	unsigned long	count;
+	unsigned long	bytes;
+};
+
+static struct stats read_stats = {
+	.name = "read", .total = 0, .count = 0, .bytes = 0};
+static struct stats pread_stats = {
+	.name = "pread", .total = 0, .count = 0, .bytes = 0};
+static struct stats write_stats = {
+	.name = "write", .total = 0, .count = 0, .bytes = 0};
+static struct stats pwrite_stats = {
+	.name = "pwrite", .total = 0, .count = 0, .bytes = 0};
+static struct stats seek_stats = {
+	.name = "seek", .total = 0, .count = 0, .bytes = 0};
+
+static void update_stats(struct stats * const ustats,
+			 unsigned long measurement, unsigned long bytes)
+{
+	ustats->total += measurement;
+	ustats->bytes += bytes;
+	ustats->count++;
+}
+
+static void print_ustats(const struct stats * const ustats)
+{
+	DEBUG_PRINT_L1("%s\t%7d\t%7.0f\t%7.0f\n",
+		       ustats->name, ustats->count,
+		       (float)ustats->total / (float)ustats->count,
+		       (float)ustats->bytes / (float)ustats->count);
+}
+
+static void print_stats(void)
+{
+	DEBUG_PRINT_L1("\nSyscall\tCall\tAvgTime\tAvgSize\n"
+		       "\tCount\t(ticks)\t(bytes)\n"
+		       "-------------------------------\n");
+
+	print_ustats(&read_stats);
+	print_ustats(&pread_stats);
+	print_ustats(&write_stats);
+	print_ustats(&pwrite_stats);
+	print_ustats(&seek_stats);
+}
+
+static int build_memory_map(void)
+{
+	char line[256];
+	FILE *fp;
+	int i;
+
+	range_count = 0;
+
+	fp = fopen("/proc/iomem", "r");
+	if (!fp) {
+		fprintf(stderr, "/proc/iomem: error %d: %s\n",
+			errno, strerror(errno));
+		return -errno;
+	}
+
+	while (fgets(line, sizeof(line), fp) != 0) {
+		if (strstr(line, system_ram_str)) {
+			char *dash, *end_ptr;
+
+			/* Given a line like this:
+			 * d0400000-10ffaffff : System RAM
+			 * replace the "-" with a space
+			 */
+			dash = strstr(line, "-");
+			dash[0] = 0x20;
+
+			start_addr[range_count] = strtoull(line, &end_ptr, 16);
+			end_addr[range_count] = strtoull(end_ptr, NULL, 16);
+			range_count++;
+		}
+	}
+
+	fclose(fp);
+
+	DEBUG_PRINT_L1("RAM Ranges\n");
+	for (i = 0; i < range_count; i++)
+		DEBUG_PRINT_L1("\trange %d: 0x%llx\t- 0x%llx\n",
+			       i, start_addr[i], end_addr[i]);
+
+	if (range_count == 0) {
+		fprintf(stderr, "No valid address ranges found.  Error.\n");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int read_adi(int fd, unsigned char *buf, int buf_sz)
+{
+	int ret, bytes_read = 0;
+	long start, end, elapsed_time = 0;
+
+	do {
+		RDTICK(start);
+		ret = read(fd, buf + bytes_read, buf_sz - bytes_read);
+		RDTICK(end);
+		if (ret < 0)
+			return -errno;
+
+		elapsed_time += end - start;
+		update_stats(&read_stats, elapsed_time, buf_sz);
+		bytes_read += ret;
+
+	} while (bytes_read < buf_sz);
+
+	DEBUG_PRINT_T("\tread elapsed timed = %ld\n", elapsed_time);
+	DEBUG_PRINT_L3("\tRead  %d bytes\n", bytes_read);
+
+	return bytes_read;
+}
+
+static int pread_adi(int fd, unsigned char *buf,
+		     int buf_sz, unsigned long offset)
+{
+	int ret, i, bytes_read = 0;
+	unsigned long cur_offset;
+	long start, end, elapsed_time = 0;
+
+	cur_offset = offset;
+	do {
+		RDTICK(start);
+		ret = pread(fd, buf + bytes_read, buf_sz - bytes_read,
+			    cur_offset);
+		RDTICK(end);
+		if (ret < 0)
+			return -errno;
+
+		elapsed_time += end - start;
+		update_stats(&pread_stats, elapsed_time, buf_sz);
+		bytes_read += ret;
+		cur_offset += ret;
+
+	} while (bytes_read < buf_sz);
+
+	DEBUG_PRINT_T("\tpread elapsed timed = %ld\n", elapsed_time);
+	DEBUG_PRINT_L3("\tRead  %d bytes starting at offset 0x%lx\n",
+		       bytes_read, offset);
+	for (i = 0; i < bytes_read; i++)
+		DEBUG_PRINT_L4("\t\t0x%lx\t%d\n", offset + i, buf[i]);
+
+	return bytes_read;
+}
+
+static int write_adi(int fd, const unsigned char * const buf, int buf_sz)
+{
+	int ret, bytes_written = 0;
+	long start, end, elapsed_time = 0;
+
+	do {
+		RDTICK(start);
+		ret = write(fd, buf + bytes_written, buf_sz - bytes_written);
+		RDTICK(end);
+		if (ret < 0)
+			return -errno;
+
+		elapsed_time += (end - start);
+		update_stats(&write_stats, elapsed_time, buf_sz);
+		bytes_written += ret;
+	} while (bytes_written < buf_sz);
+
+	DEBUG_PRINT_T("\twrite elapsed timed = %ld\n", elapsed_time);
+	DEBUG_PRINT_L3("\tWrote %d of %d bytes\n", bytes_written, buf_sz);
+
+	return bytes_written;
+}
+
+static int pwrite_adi(int fd, const unsigned char * const buf,
+		      int buf_sz, unsigned long offset)
+{
+	int ret, bytes_written = 0;
+	unsigned long cur_offset;
+	long start, end, elapsed_time = 0;
+
+	cur_offset = offset;
+
+	do {
+		RDTICK(start);
+		ret = pwrite(fd, buf + bytes_written,
+			     buf_sz - bytes_written, cur_offset);
+		RDTICK(end);
+		if (ret < 0) {
+			fprintf(stderr, "pwrite(): error %d: %s\n",
+				errno, strerror(errno));
+			return -errno;
+		}
+
+		elapsed_time += (end - start);
+		update_stats(&pwrite_stats, elapsed_time, buf_sz);
+		bytes_written += ret;
+		cur_offset += ret;
+
+	} while (bytes_written < buf_sz);
+
+	DEBUG_PRINT_T("\tpwrite elapsed timed = %ld\n", elapsed_time);
+	DEBUG_PRINT_L3("\tWrote %d of %d bytes starting at address 0x%lx\n",
+		       bytes_written, buf_sz, offset);
+
+	return bytes_written;
+}
+
+static off_t seek_adi(int fd, off_t offset, int whence)
+{
+	long start, end;
+	off_t ret;
+
+	RDTICK(start);
+	ret = lseek(fd, offset, whence);
+	RDTICK(end);
+	DEBUG_PRINT_L2("\tlseek ret = 0x%llx\n", ret);
+	if (ret < 0)
+		goto out;
+
+	DEBUG_PRINT_T("\tlseek elapsed timed = %ld\n", end - start);
+	update_stats(&seek_stats, end - start, 0);
+
+out:
+	(void)lseek(fd, 0, SEEK_END);
+	return ret;
+}
+
+static int test0_prpw_aligned_1byte(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		(end_addr[range_count - 1] - 0x1000) & ~(ADI_BLKSZ - 1);
+	unsigned char version[1], expected_version;
+	loff_t offset;
+	int ret;
+
+	version[0] = random_version();
+	expected_version = version[0];
+
+	offset = paddr / ADI_BLKSZ;
+
+	ret = pwrite_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	ret = pread_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	if (expected_version != version[0]) {
+		DEBUG_PRINT_L2("\tExpected version %d but read version %d\n",
+			       expected_version, version[0]);
+		TEST_STEP_FAILURE(-expected_version);
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+#define TEST1_VERSION_SZ	4096
+static int test1_prpw_aligned_4096bytes(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		(end_addr[range_count - 1] - 0x6000) & ~(ADI_BLKSZ - 1);
+	unsigned char version[TEST1_VERSION_SZ],
+		expected_version[TEST1_VERSION_SZ];
+	loff_t offset;
+	int ret, i;
+
+	for (i = 0; i < TEST1_VERSION_SZ; i++) {
+		version[i] = random_version();
+		expected_version[i] = version[i];
+	}
+
+	offset = paddr / ADI_BLKSZ;
+
+	ret = pwrite_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	ret = pread_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	for (i = 0; i < TEST1_VERSION_SZ; i++) {
+		if (expected_version[i] != version[i]) {
+			DEBUG_PRINT_L2(
+				"\tExpected version %d but read version %d\n",
+				expected_version, version[0]);
+			TEST_STEP_FAILURE(-expected_version[i]);
+		}
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+#define TEST2_VERSION_SZ	10327
+static int test2_prpw_aligned_10327bytes(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		(start_addr[0] + 0x6000) & ~(ADI_BLKSZ - 1);
+	unsigned char version[TEST2_VERSION_SZ],
+		expected_version[TEST2_VERSION_SZ];
+	loff_t offset;
+	int ret, i;
+
+	for (i = 0; i < TEST2_VERSION_SZ; i++) {
+		version[i] = random_version();
+		expected_version[i] = version[i];
+	}
+
+	offset = paddr / ADI_BLKSZ;
+
+	ret = pwrite_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	ret = pread_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	for (i = 0; i < TEST2_VERSION_SZ; i++) {
+		if (expected_version[i] != version[i]) {
+			DEBUG_PRINT_L2(
+				"\tExpected version %d but read version %d\n",
+				expected_version, version[0]);
+			TEST_STEP_FAILURE(-expected_version[i]);
+		}
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+#define TEST3_VERSION_SZ	12541
+static int test3_prpw_unaligned_12541bytes(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		((start_addr[0] + 0xC000) & ~(ADI_BLKSZ - 1)) + 17;
+	unsigned char version[TEST3_VERSION_SZ],
+		expected_version[TEST3_VERSION_SZ];
+	loff_t offset;
+	int ret, i;
+
+	for (i = 0; i < TEST3_VERSION_SZ; i++) {
+		version[i] = random_version();
+		expected_version[i] = version[i];
+	}
+
+	offset = paddr / ADI_BLKSZ;
+
+	ret = pwrite_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	ret = pread_adi(fd, version, sizeof(version), offset);
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	for (i = 0; i < TEST3_VERSION_SZ; i++) {
+		if (expected_version[i] != version[i]) {
+			DEBUG_PRINT_L2(
+				"\tExpected version %d but read version %d\n",
+				expected_version, version[0]);
+			TEST_STEP_FAILURE(-expected_version[i]);
+		}
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+static int test4_lseek(int fd)
+{
+#define	OFFSET_ADD	(0x100)
+#define OFFSET_SUBTRACT	(0xFFFFFFF000000000)
+
+	off_t offset_out, offset_in;
+	int ret;
+
+
+	offset_in = 0x123456789abcdef0;
+	offset_out = seek_adi(fd, offset_in, SEEK_SET);
+	if (offset_out != offset_in) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	/* seek to the current offset.  this should return EINVAL */
+	offset_out = seek_adi(fd, offset_in, SEEK_SET);
+	if (offset_out < 0 && errno == EINVAL)
+		DEBUG_PRINT_L2(
+			"\tSEEK_SET failed as designed. Not an error\n");
+	else {
+		ret = -2;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	offset_out = seek_adi(fd, 0, SEEK_CUR);
+	if (offset_out != offset_in) {
+		ret = -3;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	offset_out = seek_adi(fd, OFFSET_ADD, SEEK_CUR);
+	if (offset_out != (offset_in + OFFSET_ADD)) {
+		ret = -4;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	offset_out = seek_adi(fd, OFFSET_SUBTRACT, SEEK_CUR);
+	if (offset_out != (offset_in + OFFSET_ADD + OFFSET_SUBTRACT)) {
+		ret = -5;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+static int test5_rw_aligned_1byte(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		(end_addr[range_count - 1] - 0xF000) & ~(ADI_BLKSZ - 1);
+	unsigned char version, expected_version;
+	loff_t offset;
+	off_t oret;
+	int ret;
+
+	offset = paddr / ADI_BLKSZ;
+	version = expected_version = random_version();
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = write_adi(fd, &version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = read_adi(fd, &version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	if (expected_version != version) {
+		DEBUG_PRINT_L2("\tExpected version %d but read version %d\n",
+			       expected_version, version);
+		TEST_STEP_FAILURE(-expected_version);
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+#define TEST6_VERSION_SZ        9434
+static int test6_rw_aligned_9434bytes(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+		(end_addr[range_count - 1] - 0x5F000) & ~(ADI_BLKSZ - 1);
+	unsigned char version[TEST6_VERSION_SZ],
+		      expected_version[TEST6_VERSION_SZ];
+	loff_t offset;
+	off_t oret;
+	int ret, i;
+
+	offset = paddr / ADI_BLKSZ;
+	for (i = 0; i < TEST6_VERSION_SZ; i++)
+		version[i] = expected_version[i] = random_version();
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = write_adi(fd, version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	memset(version, 0, TEST6_VERSION_SZ);
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = read_adi(fd, version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	for (i = 0; i < TEST6_VERSION_SZ; i++) {
+		if (expected_version[i] != version[i]) {
+			DEBUG_PRINT_L2(
+				"\tExpected version %d but read version %d\n",
+				expected_version[i], version[i]);
+			TEST_STEP_FAILURE(-expected_version[i]);
+		}
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+#define TEST7_VERSION_SZ        14963
+static int test7_rw_aligned_14963bytes(int fd)
+{
+	/* somewhat arbitrarily chosen address */
+	unsigned long paddr =
+	  ((start_addr[range_count - 1] + 0xF000) & ~(ADI_BLKSZ - 1)) + 39;
+	unsigned char version[TEST7_VERSION_SZ],
+		      expected_version[TEST7_VERSION_SZ];
+	loff_t offset;
+	off_t oret;
+	int ret, i;
+
+	offset = paddr / ADI_BLKSZ;
+	for (i = 0; i < TEST7_VERSION_SZ; i++) {
+		version[i] = random_version();
+		expected_version[i] = version[i];
+	}
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = write_adi(fd, version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	memset(version, 0, TEST7_VERSION_SZ);
+
+	oret = seek_adi(fd, offset, SEEK_SET);
+	if (oret != offset) {
+		ret = -1;
+		TEST_STEP_FAILURE(ret);
+	}
+
+	ret = read_adi(fd, version, sizeof(version));
+	if (ret != sizeof(version))
+		TEST_STEP_FAILURE(ret);
+
+	for (i = 0; i < TEST7_VERSION_SZ; i++) {
+		if (expected_version[i] != version[i]) {
+			DEBUG_PRINT_L2(
+				"\tExpected version %d but read version %d\n",
+				expected_version[i], version[i]);
+			TEST_STEP_FAILURE(-expected_version[i]);
+		}
+
+		paddr += ADI_BLKSZ;
+	}
+
+	ret = 0;
+out:
+	RETURN_FROM_TEST(ret);
+}
+
+static int (*tests[])(int fd) = {
+	test0_prpw_aligned_1byte,
+	test1_prpw_aligned_4096bytes,
+	test2_prpw_aligned_10327bytes,
+	test3_prpw_unaligned_12541bytes,
+	test4_lseek,
+	test5_rw_aligned_1byte,
+	test6_rw_aligned_9434bytes,
+	test7_rw_aligned_14963bytes,
+};
+#define TEST_COUNT	ARRAY_SIZE(tests)
+
+int main(int argc, char *argv[])
+{
+	int fd, ret, test;
+
+	ret = build_memory_map();
+	if (ret < 0)
+		return ret;
+
+	fd = open("/dev/adi", O_RDWR);
+	if (fd < 0) {
+		fprintf(stderr, "open: error %d: %s\n",
+			errno, strerror(errno));
+		return -errno;
+	}
+
+	for (test = 0; test < TEST_COUNT; test++) {
+		DEBUG_PRINT_L1("Running test #%d\n", test);
+
+		ret = (*tests[test])(fd);
+		if (ret != 0)
+			ksft_test_result_fail("Test #%d failed: error %d\n",
+					      test, ret);
+		else
+			ksft_test_result_pass("Test #%d passed\n", test);
+	}
+
+	print_stats();
+	close(fd);
+
+	if (ksft_get_fail_cnt() > 0)
+		ksft_exit_fail();
+	else
+		ksft_exit_pass();
+
+	/* it's impossible to get here, but the compiler throws a warning
+	 * about control reaching the end of non-void function.  bah.
+	 */
+	return 0;
+}
diff --git a/tools/testing/selftests/sparc64/drivers/drivers_test.sh b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
new file mode 100755
index 000000000..6d08273b7
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+SRC_TREE=../../../../
+
+test_run()
+{
+	if [ -f ${SRC_TREE}/drivers/char/adi.ko ]; then
+		insmod ${SRC_TREE}/drivers/char/adi.ko 2> /dev/null
+		if [ $? -ne 0 ]; then
+			rc=1
+		fi
+	else
+		# Use modprobe dry run to check for missing adi module
+		if ! /sbin/modprobe -q -n adi; then
+			echo "adi: [SKIP]"
+		elif /sbin/modprobe -q adi; then
+			echo "adi: ok"
+		else
+			echo "adi: [FAIL]"
+			rc=1
+		fi
+	fi
+	./adi-test
+	rmmod adi 2> /dev/null
+}
+
+rc=0
+test_run
+exit $rc
diff --git a/tools/testing/selftests/sparc64/run.sh b/tools/testing/selftests/sparc64/run.sh
new file mode 100755
index 000000000..38ad61f93
--- /dev/null
+++ b/tools/testing/selftests/sparc64/run.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+(cd drivers; ./drivers_test.sh)
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
new file mode 100644
index 000000000..1e23fefd6
--- /dev/null
+++ b/tools/testing/selftests/splice/.gitignore
@@ -0,0 +1 @@
+default_file_splice_read
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
new file mode 100644
index 000000000..e519b159b
--- /dev/null
+++ b/tools/testing/selftests/splice/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_PROGS := default_file_splice_read.sh
+TEST_GEN_PROGS_EXTENDED := default_file_splice_read
+
+include ../lib.mk
diff --git a/tools/testing/selftests/splice/default_file_splice_read.c b/tools/testing/selftests/splice/default_file_splice_read.c
new file mode 100644
index 000000000..a3c6e5672
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <fcntl.h>
+
+int main(int argc, char **argv)
+{
+        splice(0, 0, 1, 0, 1<<30, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/splice/default_file_splice_read.sh b/tools/testing/selftests/splice/default_file_splice_read.sh
new file mode 100755
index 000000000..490db5a2e
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+n=`./default_file_splice_read </dev/null | wc -c`
+
+test "$n" = 0 && exit 0
+
+echo "default_file_splice_read broken: leaked $n"
+exit 1
diff --git a/tools/testing/selftests/static_keys/Makefile b/tools/testing/selftests/static_keys/Makefile
new file mode 100644
index 000000000..9cdadf37f
--- /dev/null
+++ b/tools/testing/selftests/static_keys/Makefile
@@ -0,0 +1,8 @@
+# Makefile for static keys selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_static_keys.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/static_keys/config b/tools/testing/selftests/static_keys/config
new file mode 100644
index 000000000..d538fb774
--- /dev/null
+++ b/tools/testing/selftests/static_keys/config
@@ -0,0 +1 @@
+CONFIG_TEST_STATIC_KEYS=m
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
new file mode 100755
index 000000000..fc9f8cde7
--- /dev/null
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs static keys kernel module tests
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_static_key_base; then
+	echo "static_key: module test_static_key_base is not found [SKIP]"
+	exit $ksft_skip
+fi
+
+if ! /sbin/modprobe -q -n test_static_keys; then
+	echo "static_key: module test_static_keys is not found [SKIP]"
+	exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test_static_key_base; then
+	if /sbin/modprobe -q test_static_keys; then
+		echo "static_key: ok"
+		/sbin/modprobe -q -r test_static_keys
+		/sbin/modprobe -q -r test_static_key_base
+	else
+		echo "static_keys: [FAIL]"
+		/sbin/modprobe -q -r test_static_key_base
+	fi
+else
+	echo "static_key: [FAIL]"
+	exit 1
+fi
diff --git a/tools/testing/selftests/sync/.gitignore b/tools/testing/selftests/sync/.gitignore
new file mode 100644
index 000000000..f5091e779
--- /dev/null
+++ b/tools/testing/selftests/sync/.gitignore
@@ -0,0 +1 @@
+sync_test
diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
new file mode 100644
index 000000000..d0121a8a3
--- /dev/null
+++ b/tools/testing/selftests/sync/Makefile
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
+CFLAGS += -I../../../../usr/include/
+LDFLAGS += -pthread
+
+.PHONY: all clean
+
+include ../lib.mk
+
+# lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special
+# build rules. lib.mk will run and install them.
+
+TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test
+all: $(TEST_CUSTOM_PROGS)
+
+OBJS = sync_test.o sync.o
+
+TESTS += sync_alloc.o
+TESTS += sync_fence.o
+TESTS += sync_merge.o
+TESTS += sync_wait.o
+TESTS += sync_stress_parallelism.o
+TESTS += sync_stress_consumer.o
+TESTS += sync_stress_merge.o
+
+OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS))
+TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS))
+
+$(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
+	$(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
+
+$(OBJS): $(OUTPUT)/%.o: %.c
+	$(CC) -c $^ -o $@ $(CFLAGS)
+
+$(TESTS): $(OUTPUT)/%.o: %.c
+	$(CC) -c $^ -o $@
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS)
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
new file mode 100644
index 000000000..1ab7e8130
--- /dev/null
+++ b/tools/testing/selftests/sync/config
@@ -0,0 +1,4 @@
+CONFIG_STAGING=y
+CONFIG_ANDROID=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
diff --git a/tools/testing/selftests/sync/sw_sync.h b/tools/testing/selftests/sync/sw_sync.h
new file mode 100644
index 000000000..e2cfc6bad
--- /dev/null
+++ b/tools/testing/selftests/sync/sw_sync.h
@@ -0,0 +1,46 @@
+/*
+ *  sw_sync abstraction
+ *
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2013 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SELFTESTS_SW_SYNC_H
+#define SELFTESTS_SW_SYNC_H
+
+/*
+ * sw_sync is mainly intended for testing and should not be compiled into
+ * production kernels
+ */
+
+int sw_sync_timeline_create(void);
+int sw_sync_timeline_is_valid(int fd);
+int sw_sync_timeline_inc(int fd, unsigned int count);
+void sw_sync_timeline_destroy(int fd);
+
+int sw_sync_fence_create(int fd, const char *name, unsigned int value);
+int sw_sync_fence_is_valid(int fd);
+void sw_sync_fence_destroy(int fd);
+
+#endif
diff --git a/tools/testing/selftests/sync/sync.c b/tools/testing/selftests/sync/sync.c
new file mode 100644
index 000000000..f3d599f24
--- /dev/null
+++ b/tools/testing/selftests/sync/sync.c
@@ -0,0 +1,221 @@
+/*
+ *  sync / sw_sync abstraction
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <malloc.h>
+#include <poll.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+
+#include <linux/sync_file.h>
+
+
+/* SW_SYNC ioctls */
+struct sw_sync_create_fence_data {
+	__u32	value;
+	char	name[32];
+	__s32	fence;
+};
+
+#define SW_SYNC_IOC_MAGIC		'W'
+#define SW_SYNC_IOC_CREATE_FENCE	_IOWR(SW_SYNC_IOC_MAGIC, 0,\
+					      struct sw_sync_create_fence_data)
+#define SW_SYNC_IOC_INC			_IOW(SW_SYNC_IOC_MAGIC, 1, __u32)
+
+
+int sync_wait(int fd, int timeout)
+{
+	struct pollfd fds;
+
+	fds.fd = fd;
+	fds.events = POLLIN | POLLERR;
+
+	return poll(&fds, 1, timeout);
+}
+
+int sync_merge(const char *name, int fd1, int fd2)
+{
+	struct sync_merge_data data = {};
+	int err;
+
+	data.fd2 = fd2;
+	strncpy(data.name, name, sizeof(data.name) - 1);
+	data.name[sizeof(data.name) - 1] = '\0';
+
+	err = ioctl(fd1, SYNC_IOC_MERGE, &data);
+	if (err < 0)
+		return err;
+
+	return data.fence;
+}
+
+static struct sync_file_info *sync_file_info(int fd)
+{
+	struct sync_file_info *info;
+	struct sync_fence_info *fence_info;
+	int err, num_fences;
+
+	info = calloc(1, sizeof(*info));
+	if (info == NULL)
+		return NULL;
+
+	err = ioctl(fd, SYNC_IOC_FILE_INFO, info);
+	if (err < 0) {
+		free(info);
+		return NULL;
+	}
+
+	num_fences = info->num_fences;
+
+	if (num_fences) {
+		info->flags = 0;
+		info->num_fences = num_fences;
+
+		fence_info = calloc(num_fences, sizeof(*fence_info));
+		if (!fence_info) {
+			free(info);
+			return NULL;
+		}
+
+		info->sync_fence_info = (uint64_t)fence_info;
+
+		err = ioctl(fd, SYNC_IOC_FILE_INFO, info);
+		if (err < 0) {
+			free(fence_info);
+			free(info);
+			return NULL;
+		}
+	}
+
+	return info;
+}
+
+static void sync_file_info_free(struct sync_file_info *info)
+{
+	free((void *)info->sync_fence_info);
+	free(info);
+}
+
+int sync_fence_size(int fd)
+{
+	int count;
+	struct sync_file_info *info = sync_file_info(fd);
+
+	if (!info)
+		return 0;
+
+	count = info->num_fences;
+
+	sync_file_info_free(info);
+
+	return count;
+}
+
+int sync_fence_count_with_status(int fd, int status)
+{
+	unsigned int i, count = 0;
+	struct sync_fence_info *fence_info = NULL;
+	struct sync_file_info *info = sync_file_info(fd);
+
+	if (!info)
+		return -1;
+
+	fence_info = (struct sync_fence_info *)info->sync_fence_info;
+	for (i = 0 ; i < info->num_fences ; i++) {
+		if (fence_info[i].status == status)
+			count++;
+	}
+
+	sync_file_info_free(info);
+
+	return count;
+}
+
+int sw_sync_timeline_create(void)
+{
+	return open("/sys/kernel/debug/sync/sw_sync", O_RDWR);
+}
+
+int sw_sync_timeline_inc(int fd, unsigned int count)
+{
+	__u32 arg = count;
+
+	return ioctl(fd, SW_SYNC_IOC_INC, &arg);
+}
+
+int sw_sync_timeline_is_valid(int fd)
+{
+	int status;
+
+	if (fd == -1)
+		return 0;
+
+	status = fcntl(fd, F_GETFD, 0);
+	return (status >= 0);
+}
+
+void sw_sync_timeline_destroy(int fd)
+{
+	if (sw_sync_timeline_is_valid(fd))
+		close(fd);
+}
+
+int sw_sync_fence_create(int fd, const char *name, unsigned int value)
+{
+	struct sw_sync_create_fence_data data = {};
+	int err;
+
+	data.value = value;
+	strncpy(data.name, name, sizeof(data.name) - 1);
+	data.name[sizeof(data.name) - 1] = '\0';
+
+	err = ioctl(fd, SW_SYNC_IOC_CREATE_FENCE, &data);
+	if (err < 0)
+		return err;
+
+	return data.fence;
+}
+
+int sw_sync_fence_is_valid(int fd)
+{
+	/* Same code! */
+	return sw_sync_timeline_is_valid(fd);
+}
+
+void sw_sync_fence_destroy(int fd)
+{
+	if (sw_sync_fence_is_valid(fd))
+		close(fd);
+}
diff --git a/tools/testing/selftests/sync/sync.h b/tools/testing/selftests/sync/sync.h
new file mode 100644
index 000000000..fb7156148
--- /dev/null
+++ b/tools/testing/selftests/sync/sync.h
@@ -0,0 +1,40 @@
+/*
+ *  sync abstraction
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SELFTESTS_SYNC_H
+#define SELFTESTS_SYNC_H
+
+#define FENCE_STATUS_ERROR	(-1)
+#define FENCE_STATUS_ACTIVE	(0)
+#define FENCE_STATUS_SIGNALED	(1)
+
+int sync_wait(int fd, int timeout);
+int sync_merge(const char *name, int fd1, int fd2);
+int sync_fence_size(int fd);
+int sync_fence_count_with_status(int fd, int status);
+
+#endif
diff --git a/tools/testing/selftests/sync/sync_alloc.c b/tools/testing/selftests/sync/sync_alloc.c
new file mode 100644
index 000000000..66a28afc0
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_alloc.c
@@ -0,0 +1,74 @@
+/*
+ *  sync allocation tests
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_alloc_timeline(void)
+{
+	int timeline, valid;
+
+	timeline = sw_sync_timeline_create();
+	valid = sw_sync_timeline_is_valid(timeline);
+	ASSERT(valid, "Failure allocating timeline\n");
+
+	sw_sync_timeline_destroy(timeline);
+	return 0;
+}
+
+int test_alloc_fence(void)
+{
+	int timeline, fence, valid;
+
+	timeline = sw_sync_timeline_create();
+	valid = sw_sync_timeline_is_valid(timeline);
+	ASSERT(valid, "Failure allocating timeline\n");
+
+	fence = sw_sync_fence_create(timeline, "allocFence", 1);
+	valid = sw_sync_fence_is_valid(fence);
+	ASSERT(valid, "Failure allocating fence\n");
+
+	sw_sync_fence_destroy(fence);
+	sw_sync_timeline_destroy(timeline);
+	return 0;
+}
+
+int test_alloc_fence_negative(void)
+{
+	int fence, timeline;
+
+	timeline = sw_sync_timeline_create();
+	ASSERT(timeline > 0, "Failure allocating timeline\n");
+
+	fence = sw_sync_fence_create(-1, "fence", 1);
+	ASSERT(fence < 0, "Success allocating negative fence\n");
+
+	sw_sync_fence_destroy(fence);
+	sw_sync_timeline_destroy(timeline);
+	return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_fence.c b/tools/testing/selftests/sync/sync_fence.c
new file mode 100644
index 000000000..13f175287
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_fence.c
@@ -0,0 +1,132 @@
+/*
+ *  sync fence tests with one timeline
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_fence_one_timeline_wait(void)
+{
+	int fence, valid, ret;
+	int timeline = sw_sync_timeline_create();
+
+	valid = sw_sync_timeline_is_valid(timeline);
+	ASSERT(valid, "Failure allocating timeline\n");
+
+	fence = sw_sync_fence_create(timeline, "allocFence", 5);
+	valid = sw_sync_fence_is_valid(fence);
+	ASSERT(valid, "Failure allocating fence\n");
+
+	/* Wait on fence until timeout */
+	ret = sync_wait(fence, 0);
+	ASSERT(ret == 0, "Failure waiting on fence until timeout\n");
+
+	/* Advance timeline from 0 -> 1 */
+	ret = sw_sync_timeline_inc(timeline, 1);
+	ASSERT(ret == 0, "Failure advancing timeline\n");
+
+	/* Wait on fence until timeout */
+	ret = sync_wait(fence, 0);
+	ASSERT(ret == 0, "Failure waiting on fence until timeout\n");
+
+	/* Signal the fence */
+	ret = sw_sync_timeline_inc(timeline, 4);
+	ASSERT(ret == 0, "Failure signaling the fence\n");
+
+	/* Wait successfully */
+	ret = sync_wait(fence, 0);
+	ASSERT(ret > 0, "Failure waiting on fence\n");
+
+	/* Go even further, and confirm wait still succeeds */
+	ret = sw_sync_timeline_inc(timeline, 10);
+	ASSERT(ret == 0, "Failure going further\n");
+	ret = sync_wait(fence, 0);
+	ASSERT(ret > 0, "Failure waiting ahead\n");
+
+	sw_sync_fence_destroy(fence);
+	sw_sync_timeline_destroy(timeline);
+
+	return 0;
+}
+
+int test_fence_one_timeline_merge(void)
+{
+	int a, b, c, d, valid;
+	int timeline = sw_sync_timeline_create();
+
+	/* create fence a,b,c and then merge them all into fence d */
+	a = sw_sync_fence_create(timeline, "allocFence", 1);
+	b = sw_sync_fence_create(timeline, "allocFence", 2);
+	c = sw_sync_fence_create(timeline, "allocFence", 3);
+
+	valid = sw_sync_fence_is_valid(a) &&
+		sw_sync_fence_is_valid(b) &&
+		sw_sync_fence_is_valid(c);
+	ASSERT(valid, "Failure allocating fences\n");
+
+	d = sync_merge("mergeFence", b, a);
+	d = sync_merge("mergeFence", c, d);
+	valid = sw_sync_fence_is_valid(d);
+	ASSERT(valid, "Failure merging fences\n");
+
+	/* confirm all fences have one active point (even d) */
+	ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+	       "a has too many active fences!\n");
+	ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+	       "b has too many active fences!\n");
+	ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+	       "c has too many active fences!\n");
+	ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+	       "d has too many active fences!\n");
+
+	/* confirm that d is not signaled until the max of a,b,c */
+	sw_sync_timeline_inc(timeline, 1);
+	ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_SIGNALED) == 1,
+	       "a did not signal!\n");
+	ASSERT(sync_fence_count_with_status(d, FENCE_STATUS_ACTIVE) == 1,
+	       "d signaled too early!\n");
+
+	sw_sync_timeline_inc(timeline, 1);
+	ASSERT(sync_fence_count_with_status(b, FENCE_STATUS_SIGNALED) == 1,
+	       "b did not signal!\n");
+	ASSERT(sync_fence_count_with_status(d, FENCE_STATUS_ACTIVE) == 1,
+	       "d signaled too early!\n");
+
+	sw_sync_timeline_inc(timeline, 1);
+	ASSERT(sync_fence_count_with_status(c, FENCE_STATUS_SIGNALED) == 1,
+	       "c did not signal!\n");
+	ASSERT(sync_fence_count_with_status(d, FENCE_STATUS_ACTIVE) == 0 &&
+	       sync_fence_count_with_status(d, FENCE_STATUS_SIGNALED) == 1,
+	       "d did not signal!\n");
+
+	sw_sync_fence_destroy(d);
+	sw_sync_fence_destroy(c);
+	sw_sync_fence_destroy(b);
+	sw_sync_fence_destroy(a);
+	sw_sync_timeline_destroy(timeline);
+	return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_merge.c b/tools/testing/selftests/sync/sync_merge.c
new file mode 100644
index 000000000..8914d4339
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_merge.c
@@ -0,0 +1,60 @@
+/*
+ *  sync fence merge tests
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_fence_merge_same_fence(void)
+{
+	int fence, valid, merged;
+	int timeline = sw_sync_timeline_create();
+
+	valid = sw_sync_timeline_is_valid(timeline);
+	ASSERT(valid, "Failure allocating timeline\n");
+
+	fence = sw_sync_fence_create(timeline, "allocFence", 5);
+	valid = sw_sync_fence_is_valid(fence);
+	ASSERT(valid, "Failure allocating fence\n");
+
+	merged = sync_merge("mergeFence", fence, fence);
+	valid = sw_sync_fence_is_valid(fence);
+	ASSERT(valid, "Failure merging fence\n");
+
+	ASSERT(sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED) == 0,
+	       "fence signaled too early!\n");
+
+	sw_sync_timeline_inc(timeline, 5);
+	ASSERT(sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED) == 1,
+	       "fence did not signal!\n");
+
+	sw_sync_fence_destroy(merged);
+	sw_sync_fence_destroy(fence);
+	sw_sync_timeline_destroy(timeline);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_stress_consumer.c b/tools/testing/selftests/sync/sync_stress_consumer.c
new file mode 100644
index 000000000..d9eff8d52
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_stress_consumer.c
@@ -0,0 +1,185 @@
+/*
+ *  sync stress test: producer/consumer
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+/* IMPORTANT NOTE: if you see this test failing on your system, it may be
+ * due to a shortage of file descriptors. Please ensure your system has
+ * a sensible limit for this test to finish correctly.
+ */
+
+/* Returns 1 on error, 0 on success */
+static int busy_wait_on_fence(int fence)
+{
+	int error, active;
+
+	do {
+		error = sync_fence_count_with_status(fence, FENCE_STATUS_ERROR);
+		ASSERT(error == 0, "Error occurred on fence\n");
+		active = sync_fence_count_with_status(fence,
+						      FENCE_STATUS_ACTIVE);
+	} while (active);
+
+	return 0;
+}
+
+static struct {
+	int iterations;
+	int threads;
+	int counter;
+	int consumer_timeline;
+	int *producer_timelines;
+	pthread_mutex_t lock;
+} test_data_mpsc;
+
+static int mpsc_producer_thread(void *d)
+{
+	int id = (long)d;
+	int fence, valid, i;
+	int *producer_timelines = test_data_mpsc.producer_timelines;
+	int consumer_timeline = test_data_mpsc.consumer_timeline;
+	int iterations = test_data_mpsc.iterations;
+
+	for (i = 0; i < iterations; i++) {
+		fence = sw_sync_fence_create(consumer_timeline, "fence", i);
+		valid = sw_sync_fence_is_valid(fence);
+		ASSERT(valid, "Failure creating fence\n");
+
+		/*
+		 * Wait for the consumer to finish. Use alternate
+		 * means of waiting on the fence
+		 */
+
+		if ((iterations + id) % 8 != 0) {
+			ASSERT(sync_wait(fence, -1) > 0,
+			       "Failure waiting on fence\n");
+		} else {
+			ASSERT(busy_wait_on_fence(fence) == 0,
+			       "Failure waiting on fence\n");
+		}
+
+		/*
+		 * Every producer increments the counter, the consumer
+		 * checks and erases it
+		 */
+		pthread_mutex_lock(&test_data_mpsc.lock);
+		test_data_mpsc.counter++;
+		pthread_mutex_unlock(&test_data_mpsc.lock);
+
+		ASSERT(sw_sync_timeline_inc(producer_timelines[id], 1) == 0,
+		       "Error advancing producer timeline\n");
+
+		sw_sync_fence_destroy(fence);
+	}
+
+	return 0;
+}
+
+static int mpcs_consumer_thread(void)
+{
+	int fence, merged, tmp, valid, it, i;
+	int *producer_timelines = test_data_mpsc.producer_timelines;
+	int consumer_timeline = test_data_mpsc.consumer_timeline;
+	int iterations = test_data_mpsc.iterations;
+	int n = test_data_mpsc.threads;
+
+	for (it = 1; it <= iterations; it++) {
+		fence = sw_sync_fence_create(producer_timelines[0], "name", it);
+		for (i = 1; i < n; i++) {
+			tmp = sw_sync_fence_create(producer_timelines[i],
+						   "name", it);
+			merged = sync_merge("name", tmp, fence);
+			sw_sync_fence_destroy(tmp);
+			sw_sync_fence_destroy(fence);
+			fence = merged;
+		}
+
+		valid = sw_sync_fence_is_valid(fence);
+		ASSERT(valid, "Failure merging fences\n");
+
+		/*
+		 * Make sure we see an increment from every producer thread.
+		 * Vary the means by which we wait.
+		 */
+		if (iterations % 8 != 0) {
+			ASSERT(sync_wait(fence, -1) > 0,
+			       "Producers did not increment as expected\n");
+		} else {
+			ASSERT(busy_wait_on_fence(fence) == 0,
+			       "Producers did not increment as expected\n");
+		}
+
+		ASSERT(test_data_mpsc.counter == n * it,
+		       "Counter value mismatch!\n");
+
+		/* Release the producer threads */
+		ASSERT(sw_sync_timeline_inc(consumer_timeline, 1) == 0,
+		       "Failure releasing producer threads\n");
+
+		sw_sync_fence_destroy(fence);
+	}
+
+	return 0;
+}
+
+int test_consumer_stress_multi_producer_single_consumer(void)
+{
+	int iterations = 1 << 12;
+	int n = 5;
+	long i, ret;
+	int producer_timelines[n];
+	int consumer_timeline;
+	pthread_t threads[n];
+
+	consumer_timeline = sw_sync_timeline_create();
+	for (i = 0; i < n; i++)
+		producer_timelines[i] = sw_sync_timeline_create();
+
+	test_data_mpsc.producer_timelines = producer_timelines;
+	test_data_mpsc.consumer_timeline = consumer_timeline;
+	test_data_mpsc.iterations = iterations;
+	test_data_mpsc.threads = n;
+	test_data_mpsc.counter = 0;
+	pthread_mutex_init(&test_data_mpsc.lock, NULL);
+
+	for (i = 0; i < n; i++) {
+		pthread_create(&threads[i], NULL, (void * (*)(void *))
+			       mpsc_producer_thread, (void *)i);
+	}
+
+	/* Consumer thread runs here */
+	ret = mpcs_consumer_thread();
+
+	for (i = 0; i < n; i++)
+		pthread_join(threads[i], NULL);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/sync/sync_stress_merge.c b/tools/testing/selftests/sync/sync_stress_merge.c
new file mode 100644
index 000000000..99e83ef45
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_stress_merge.c
@@ -0,0 +1,115 @@
+/*
+ *  sync stress test: merging
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_merge_stress_random_merge(void)
+{
+	int i, size, ret;
+	int timeline_count = 32;
+	int merge_count = 1024 * 32;
+	int timelines[timeline_count];
+	int fence_map[timeline_count];
+	int fence, tmpfence, merged, valid;
+	int timeline, timeline_offset, sync_point;
+
+	srand(time(NULL));
+
+	for (i = 0; i < timeline_count; i++)
+		timelines[i] = sw_sync_timeline_create();
+
+	fence = sw_sync_fence_create(timelines[0], "fence", 0);
+	valid = sw_sync_fence_is_valid(fence);
+	ASSERT(valid, "Failure creating fence\n");
+
+	memset(fence_map, -1, sizeof(fence_map));
+	fence_map[0] = 0;
+
+	/*
+	 * Randomly create sync_points out of a fixed set of timelines,
+	 * and merge them together
+	 */
+	for (i = 0; i < merge_count; i++) {
+		/* Generate sync_point. */
+		timeline_offset = rand() % timeline_count;
+		timeline = timelines[timeline_offset];
+		sync_point = rand();
+
+		/* Keep track of the latest sync_point in each timeline. */
+		if (fence_map[timeline_offset] == -1)
+			fence_map[timeline_offset] = sync_point;
+		else if (fence_map[timeline_offset] < sync_point)
+			fence_map[timeline_offset] = sync_point;
+
+		/* Merge */
+		tmpfence = sw_sync_fence_create(timeline, "fence", sync_point);
+		merged = sync_merge("merge", tmpfence, fence);
+		sw_sync_fence_destroy(tmpfence);
+		sw_sync_fence_destroy(fence);
+		fence = merged;
+
+		valid = sw_sync_fence_is_valid(merged);
+		ASSERT(valid, "Failure creating fence i\n");
+	}
+
+	size = 0;
+	for (i = 0; i < timeline_count; i++)
+		if (fence_map[i] != -1)
+			size++;
+
+	/* Confirm our map matches the fence. */
+	ASSERT(sync_fence_size(fence) == size,
+	       "Quantity of elements not matching\n");
+
+	/* Trigger the merged fence */
+	for (i = 0; i < timeline_count; i++) {
+		if (fence_map[i] != -1) {
+			ret = sync_wait(fence, 0);
+			ASSERT(ret == 0,
+			       "Failure waiting on fence until timeout\n");
+			/* Increment the timeline to the last sync_point */
+			sw_sync_timeline_inc(timelines[i], fence_map[i]);
+		}
+	}
+
+	/* Check that the fence is triggered. */
+	ret = sync_wait(fence, 0);
+	ASSERT(ret > 0, "Failure triggering fence\n");
+
+	sw_sync_fence_destroy(fence);
+
+	for (i = 0; i < timeline_count; i++)
+		sw_sync_timeline_destroy(timelines[i]);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_stress_parallelism.c b/tools/testing/selftests/sync/sync_stress_parallelism.c
new file mode 100644
index 000000000..e6c9be671
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_stress_parallelism.c
@@ -0,0 +1,111 @@
+/*
+ *  sync stress test: parallelism
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+static struct {
+	int iterations;
+	int timeline;
+	int counter;
+} test_data_two_threads;
+
+static int test_stress_two_threads_shared_timeline_thread(void *d)
+{
+	int thread_id = (long)d;
+	int timeline = test_data_two_threads.timeline;
+	int iterations = test_data_two_threads.iterations;
+	int fence, valid, ret, i;
+
+	for (i = 0; i < iterations; i++) {
+		fence = sw_sync_fence_create(timeline, "fence",
+					     i * 2 + thread_id);
+		valid = sw_sync_fence_is_valid(fence);
+		ASSERT(valid, "Failure allocating fence\n");
+
+		/* Wait on the prior thread to complete */
+		ret = sync_wait(fence, -1);
+		ASSERT(ret > 0, "Problem occurred on prior thread\n");
+
+		/*
+		 * Confirm the previous thread's writes are visible
+		 * and then increment
+		 */
+		ASSERT(test_data_two_threads.counter == i * 2 + thread_id,
+		       "Counter got damaged!\n");
+		test_data_two_threads.counter++;
+
+		/* Kick off the other thread */
+		ret = sw_sync_timeline_inc(timeline, 1);
+		ASSERT(ret == 0, "Advancing timeline failed\n");
+
+		sw_sync_fence_destroy(fence);
+	}
+
+	return 0;
+}
+
+int test_stress_two_threads_shared_timeline(void)
+{
+	pthread_t a, b;
+	int valid;
+	int timeline = sw_sync_timeline_create();
+
+	valid = sw_sync_timeline_is_valid(timeline);
+	ASSERT(valid, "Failure allocating timeline\n");
+
+	test_data_two_threads.iterations = 1 << 16;
+	test_data_two_threads.counter = 0;
+	test_data_two_threads.timeline = timeline;
+
+	/*
+	 * Use a single timeline to synchronize two threads
+	 * hammmering on the same counter.
+	 */
+
+	pthread_create(&a, NULL, (void *(*)(void *))
+		       test_stress_two_threads_shared_timeline_thread,
+		       (void *)0);
+	pthread_create(&b, NULL, (void *(*)(void *))
+		       test_stress_two_threads_shared_timeline_thread,
+		       (void *)1);
+
+	pthread_join(a, NULL);
+	pthread_join(b, NULL);
+
+	/* make sure the threads did not trample on one another */
+	ASSERT(test_data_two_threads.counter ==
+	       test_data_two_threads.iterations * 2,
+	       "Counter has unexpected value\n");
+
+	sw_sync_timeline_destroy(timeline);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
new file mode 100644
index 000000000..7f7938263
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -0,0 +1,113 @@
+/*
+ *  sync test runner
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+#include "synctest.h"
+
+static int run_test(int (*test)(void), char *name)
+{
+	int result;
+	pid_t childpid;
+	int ret;
+
+	fflush(stdout);
+	childpid = fork();
+
+	if (childpid) {
+		waitpid(childpid, &result, 0);
+		if (WIFEXITED(result)) {
+			ret = WEXITSTATUS(result);
+			if (!ret)
+				ksft_test_result_pass("[RUN]\t%s\n", name);
+			else
+				ksft_test_result_fail("[RUN]\t%s\n", name);
+			return ret;
+		}
+		return 1;
+	}
+
+	exit(test());
+}
+
+static void sync_api_supported(void)
+{
+	struct stat sbuf;
+	int ret;
+
+	ret = stat("/sys/kernel/debug/sync/sw_sync", &sbuf);
+	if (!ret)
+		return;
+
+	if (errno == ENOENT)
+		ksft_exit_skip("Sync framework not supported by kernel\n");
+
+	if (errno == EACCES)
+		ksft_exit_skip("Run Sync test as root.\n");
+
+	ksft_exit_fail_msg("stat failed on /sys/kernel/debug/sync/sw_sync: %s",
+				strerror(errno));
+}
+
+int main(void)
+{
+	int err;
+
+	ksft_print_header();
+
+	sync_api_supported();
+
+	ksft_print_msg("[RUN]\tTesting sync framework\n");
+
+	RUN_TEST(test_alloc_timeline);
+	RUN_TEST(test_alloc_fence);
+	RUN_TEST(test_alloc_fence_negative);
+
+	RUN_TEST(test_fence_one_timeline_wait);
+	RUN_TEST(test_fence_one_timeline_merge);
+	RUN_TEST(test_fence_merge_same_fence);
+	RUN_TEST(test_fence_multi_timeline_wait);
+	RUN_TEST(test_stress_two_threads_shared_timeline);
+	RUN_TEST(test_consumer_stress_multi_producer_single_consumer);
+	RUN_TEST(test_merge_stress_random_merge);
+
+	err = ksft_get_fail_cnt();
+	if (err)
+		ksft_exit_fail_msg("%d out of %d sync tests failed\n",
+					err, ksft_test_num());
+
+	/* need this return to keep gcc happy */
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/sync/sync_wait.c b/tools/testing/selftests/sync/sync_wait.c
new file mode 100644
index 000000000..d69b752f6
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_wait.c
@@ -0,0 +1,91 @@
+/*
+ *  sync fence wait tests
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_fence_multi_timeline_wait(void)
+{
+	int timelineA, timelineB, timelineC;
+	int fenceA, fenceB, fenceC, merged;
+	int valid, active, signaled, ret;
+
+	timelineA = sw_sync_timeline_create();
+	timelineB = sw_sync_timeline_create();
+	timelineC = sw_sync_timeline_create();
+
+	fenceA = sw_sync_fence_create(timelineA, "fenceA", 5);
+	fenceB = sw_sync_fence_create(timelineB, "fenceB", 5);
+	fenceC = sw_sync_fence_create(timelineC, "fenceC", 5);
+
+	merged = sync_merge("mergeFence", fenceB, fenceA);
+	merged = sync_merge("mergeFence", fenceC, merged);
+
+	valid = sw_sync_fence_is_valid(merged);
+	ASSERT(valid, "Failure merging fence from various timelines\n");
+
+	/* Confirm fence isn't signaled */
+	active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+	ASSERT(active == 3, "Fence signaled too early!\n");
+
+	ret = sync_wait(merged, 0);
+	ASSERT(ret == 0,
+	       "Failure waiting on fence until timeout\n");
+
+	ret = sw_sync_timeline_inc(timelineA, 5);
+	active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+	signaled = sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED);
+	ASSERT(active == 2 && signaled == 1,
+	       "Fence did not signal properly!\n");
+
+	ret = sw_sync_timeline_inc(timelineB, 5);
+	active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+	signaled = sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED);
+	ASSERT(active == 1 && signaled == 2,
+	       "Fence did not signal properly!\n");
+
+	ret = sw_sync_timeline_inc(timelineC, 5);
+	active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+	signaled = sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED);
+	ASSERT(active == 0 && signaled == 3,
+	       "Fence did not signal properly!\n");
+
+	/* confirm you can successfully wait */
+	ret = sync_wait(merged, 100);
+	ASSERT(ret > 0, "Failure waiting on signaled fence\n");
+
+	sw_sync_fence_destroy(merged);
+	sw_sync_fence_destroy(fenceC);
+	sw_sync_fence_destroy(fenceB);
+	sw_sync_fence_destroy(fenceA);
+	sw_sync_timeline_destroy(timelineC);
+	sw_sync_timeline_destroy(timelineB);
+	sw_sync_timeline_destroy(timelineA);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/sync/synctest.h b/tools/testing/selftests/sync/synctest.h
new file mode 100644
index 000000000..90a8e5369
--- /dev/null
+++ b/tools/testing/selftests/sync/synctest.h
@@ -0,0 +1,67 @@
+/*
+ *  sync tests
+ *  Copyright 2015-2016 Collabora Ltd.
+ *
+ *  Based on the implementation from the Android Open Source Project,
+ *
+ *  Copyright 2012 Google, Inc
+ *
+ *  Permission is hereby granted, free of charge, to any person obtaining a
+ *  copy of this software and associated documentation files (the "Software"),
+ *  to deal in the Software without restriction, including without limitation
+ *  the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ *  and/or sell copies of the Software, and to permit persons to whom the
+ *  Software is furnished to do so, subject to the following conditions:
+ *
+ *  The above copyright notice and this permission notice shall be included in
+ *  all copies or substantial portions of the Software.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ *  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ *  OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ *  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *  OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SELFTESTS_SYNCTEST_H
+#define SELFTESTS_SYNCTEST_H
+
+#include <stdio.h>
+#include "../kselftest.h"
+
+#define ASSERT(cond, msg) do { \
+	if (!(cond)) { \
+		ksft_print_msg("[ERROR]\t%s", (msg)); \
+		return 1; \
+	} \
+} while (0)
+
+#define RUN_TEST(x) run_test((x), #x)
+
+/* Allocation tests */
+int test_alloc_timeline(void);
+int test_alloc_fence(void);
+int test_alloc_fence_negative(void);
+
+/* Fence tests with one timeline */
+int test_fence_one_timeline_wait(void);
+int test_fence_one_timeline_merge(void);
+
+/* Fence merge tests */
+int test_fence_merge_same_fence(void);
+
+/* Fence wait tests */
+int test_fence_multi_timeline_wait(void);
+
+/* Stress test - parallelism */
+int test_stress_two_threads_shared_timeline(void);
+
+/* Stress test - consumer */
+int test_consumer_stress_multi_producer_single_consumer(void);
+
+/* Stress test - merging */
+int test_merge_stress_random_merge(void);
+
+#endif
diff --git a/tools/testing/selftests/sysctl/Makefile b/tools/testing/selftests/sysctl/Makefile
new file mode 100644
index 000000000..95c320b35
--- /dev/null
+++ b/tools/testing/selftests/sysctl/Makefile
@@ -0,0 +1,12 @@
+# Makefile for sysctl selftests.
+# Expects kernel.sysctl_writes_strict=1.
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests".
+all:
+
+TEST_PROGS := sysctl.sh
+
+include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/sysctl/config b/tools/testing/selftests/sysctl/config
new file mode 100644
index 000000000..6ca14800d
--- /dev/null
+++ b/tools/testing/selftests/sysctl/config
@@ -0,0 +1 @@
+CONFIG_TEST_SYSCTL=y
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
new file mode 100755
index 000000000..584eb8ea7
--- /dev/null
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -0,0 +1,780 @@
+#!/bin/bash
+# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or at your option any
+# later version; or, when distributed separately from the Linux kernel or
+# when incorporated into other software packages, subject to the following
+# license:
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of copyleft-next (version 0.3.1 or later) as published
+# at http://copyleft-next.org/.
+
+# This performs a series tests against the proc sysctl interface.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TEST_NAME="sysctl"
+TEST_DRIVER="test_${TEST_NAME}"
+TEST_DIR=$(dirname $0)
+TEST_FILE=$(mktemp)
+
+# This represents
+#
+# TEST_ID:TEST_COUNT:ENABLED
+#
+# TEST_ID: is the test id number
+# TEST_COUNT: number of times we should run the test
+# ENABLED: 1 if enabled, 0 otherwise
+#
+# Once these are enabled please leave them as-is. Write your own test,
+# we have tons of space.
+ALL_TESTS="0001:1:1"
+ALL_TESTS="$ALL_TESTS 0002:1:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1"
+ALL_TESTS="$ALL_TESTS 0005:3:1"
+
+test_modprobe()
+{
+       if [ ! -d $DIR ]; then
+               echo "$0: $DIR not present" >&2
+               echo "You must have the following enabled in your kernel:" >&2
+               cat $TEST_DIR/config >&2
+               exit $ksft_skip
+       fi
+}
+
+function allow_user_defaults()
+{
+	if [ -z $DIR ]; then
+		DIR="/sys/module/test_sysctl/"
+	fi
+	if [ -z $DEFAULT_NUM_TESTS ]; then
+		DEFAULT_NUM_TESTS=50
+	fi
+	if [ -z $SYSCTL ]; then
+		SYSCTL="/proc/sys/debug/test_sysctl"
+	fi
+	if [ -z $PROD_SYSCTL ]; then
+		PROD_SYSCTL="/proc/sys"
+	fi
+	if [ -z $WRITES_STRICT ]; then
+		WRITES_STRICT="${PROD_SYSCTL}/kernel/sysctl_writes_strict"
+	fi
+}
+
+function check_production_sysctl_writes_strict()
+{
+	echo -n "Checking production write strict setting ... "
+	if [ ! -e ${WRITES_STRICT} ]; then
+		echo "FAIL, but skip in case of old kernel" >&2
+	else
+		old_strict=$(cat ${WRITES_STRICT})
+		if [ "$old_strict" = "1" ]; then
+			echo "ok"
+		else
+			echo "FAIL, strict value is 0 but force to 1 to continue" >&2
+			echo "1" > ${WRITES_STRICT}
+		fi
+	fi
+
+	if [ -z $PAGE_SIZE ]; then
+		PAGE_SIZE=$(getconf PAGESIZE)
+	fi
+	if [ -z $MAX_DIGITS ]; then
+		MAX_DIGITS=$(($PAGE_SIZE/8))
+	fi
+	if [ -z $INT_MAX ]; then
+		INT_MAX=$(getconf INT_MAX)
+	fi
+	if [ -z $UINT_MAX ]; then
+		UINT_MAX=$(getconf UINT_MAX)
+	fi
+}
+
+test_reqs()
+{
+	uid=$(id -u)
+	if [ $uid -ne 0 ]; then
+		echo $msg must be run as root >&2
+		exit $ksft_skip
+	fi
+
+	if ! which perl 2> /dev/null > /dev/null; then
+		echo "$0: You need perl installed"
+		exit $ksft_skip
+	fi
+	if ! which getconf 2> /dev/null > /dev/null; then
+		echo "$0: You need getconf installed"
+		exit $ksft_skip
+	fi
+	if ! which diff 2> /dev/null > /dev/null; then
+		echo "$0: You need diff installed"
+		exit $ksft_skip
+	fi
+}
+
+function load_req_mod()
+{
+	if [ ! -d $DIR ]; then
+		if ! modprobe -q -n $TEST_DRIVER; then
+			echo "$0: module $TEST_DRIVER not found [SKIP]"
+			exit $ksft_skip
+		fi
+		modprobe $TEST_DRIVER
+		if [ $? -ne 0 ]; then
+			exit
+		fi
+	fi
+}
+
+reset_vals()
+{
+	VAL=""
+	TRIGGER=$(basename ${TARGET})
+	case "$TRIGGER" in
+		int_0001)
+			VAL="60"
+			;;
+		int_0002)
+			VAL="1"
+			;;
+		uint_0001)
+			VAL="314"
+			;;
+		string_0001)
+			VAL="(none)"
+			;;
+		*)
+			;;
+	esac
+	echo -n $VAL > $TARGET
+}
+
+set_orig()
+{
+	if [ ! -z $TARGET ]; then
+		echo "${ORIG}" > "${TARGET}"
+	fi
+}
+
+set_test()
+{
+	echo "${TEST_STR}" > "${TARGET}"
+}
+
+verify()
+{
+	local seen
+	seen=$(cat "$1")
+	if [ "${seen}" != "${TEST_STR}" ]; then
+		return 1
+	fi
+	return 0
+}
+
+verify_diff_w()
+{
+	echo "$TEST_STR" | diff -q -w -u - $1
+	return $?
+}
+
+test_rc()
+{
+	if [[ $rc != 0 ]]; then
+		echo "Failed test, return value: $rc" >&2
+		exit $rc
+	fi
+}
+
+test_finish()
+{
+	set_orig
+	rm -f "${TEST_FILE}"
+
+	if [ ! -z ${old_strict} ]; then
+		echo ${old_strict} > ${WRITES_STRICT}
+	fi
+	exit $rc
+}
+
+run_numerictests()
+{
+	echo "== Testing sysctl behavior against ${TARGET} =="
+
+	rc=0
+
+	echo -n "Writing test file ... "
+	echo "${TEST_STR}" > "${TEST_FILE}"
+	if ! verify "${TEST_FILE}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl is not set to test value ... "
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing sysctl from shell ... "
+	set_test
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Resetting sysctl to original value ... "
+	set_orig
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		exit 1
+	else
+		echo "ok"
+	fi
+
+	# Now that we've validated the sanity of "set_test" and "set_orig",
+	# we can use those functions to set starting states before running
+	# specific behavioral tests.
+
+	echo -n "Writing entire sysctl in single write ... "
+	set_orig
+	dd if="${TEST_FILE}" of="${TARGET}" bs=4096 2>/dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing middle of sysctl after synchronized seek ... "
+	set_test
+	dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 skip=1 2>/dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing beyond end of sysctl ... "
+	set_orig
+	dd if="${TEST_FILE}" of="${TARGET}" bs=20 seek=2 2>/dev/null
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing sysctl with multiple long writes ... "
+	set_orig
+	(perl -e 'print "A" x 50;'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" bs=50 2>/dev/null
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
+# Your test must accept digits 3 and 4 to use this
+run_limit_digit()
+{
+	echo -n "Checking ignoring spaces up to PAGE_SIZE works on write ..."
+	reset_vals
+
+	LIMIT=$((MAX_DIGITS -1))
+	TEST_STR="3"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Checking passing PAGE_SIZE of spaces fails on write ..."
+	reset_vals
+
+	LIMIT=$((MAX_DIGITS))
+	TEST_STR="4"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
+# You are using an int
+run_limit_digit_int()
+{
+	echo -n "Testing INT_MAX works ..."
+	reset_vals
+	TEST_STR="$INT_MAX"
+	echo -n $TEST_STR > $TARGET
+
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing INT_MAX + 1 will fail as expected..."
+	reset_vals
+	let TEST_STR=$INT_MAX+1
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing negative values will work as expected..."
+	reset_vals
+	TEST_STR="-3"
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
+# You used an int array
+run_limit_digit_int_array()
+{
+	echo -n "Testing array works as expected ... "
+	TEST_STR="4 3 2 1"
+	echo -n $TEST_STR > $TARGET
+
+	if ! verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing skipping trailing array elements works ... "
+	# Do not reset_vals, carry on the values from the last test.
+	# If we only echo in two digits the last two are left intact
+	TEST_STR="100 101"
+	echo -n $TEST_STR > $TARGET
+	# After we echo in, to help diff we need to set on TEST_STR what
+	# we expect the result to be.
+	TEST_STR="100 101 2 1"
+
+	if ! verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing PAGE_SIZE limit on array works ... "
+	# Do not reset_vals, carry on the values from the last test.
+	# Even if you use an int array, you are still restricted to
+	# MAX_DIGITS, this is a known limitation. Test limit works.
+	LIMIT=$((MAX_DIGITS -1))
+	TEST_STR="9"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	TEST_STR="9 101 2 1"
+	if ! verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing exceeding PAGE_SIZE limit fails as expected ... "
+	# Do not reset_vals, carry on the values from the last test.
+	# Now go over limit.
+	LIMIT=$((MAX_DIGITS))
+	TEST_STR="7"
+	(perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+		dd of="${TARGET}" 2>/dev/null
+
+	TEST_STR="7 101 2 1"
+	if verify_diff_w "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
+# You are using an unsigned int
+run_limit_digit_uint()
+{
+	echo -n "Testing UINT_MAX works ..."
+	reset_vals
+	TEST_STR="$UINT_MAX"
+	echo -n $TEST_STR > $TARGET
+
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing UINT_MAX + 1 will fail as expected..."
+	reset_vals
+	TEST_STR=$(($UINT_MAX+1))
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+
+	echo -n "Testing negative values will not work as expected ..."
+	reset_vals
+	TEST_STR="-3"
+	echo -n $TEST_STR > $TARGET 2> /dev/null
+
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+	test_rc
+}
+
+run_stringtests()
+{
+	echo -n "Writing entire sysctl in short writes ... "
+	set_orig
+	dd if="${TEST_FILE}" of="${TARGET}" bs=1 2>/dev/null
+	if ! verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Writing middle of sysctl after unsynchronized seek ... "
+	set_test
+	dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 2>/dev/null
+	if verify "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl maxlen is at least $MAXLEN ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-2), "B";' | \
+		dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+	if ! grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl keeps original string on overflow append ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+		dd of="${TARGET}" bs=$(( MAXLEN - 1 )) 2>/dev/null
+	if grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl stays NULL terminated on write ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+		dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+	if grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	echo -n "Checking sysctl stays NULL terminated on overwrite ... "
+	set_orig
+	perl -e 'print "A" x ('"${MAXLEN}"'-1), "BB";' | \
+		dd of="${TARGET}" bs=$(( $MAXLEN + 1 )) 2>/dev/null
+	if grep -q B "${TARGET}"; then
+		echo "FAIL" >&2
+		rc=1
+	else
+		echo "ok"
+	fi
+
+	test_rc
+}
+
+sysctl_test_0001()
+{
+	TARGET="${SYSCTL}/int_0001"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+	TEST_STR=$(( $ORIG + 1 ))
+
+	run_numerictests
+	run_limit_digit
+}
+
+sysctl_test_0002()
+{
+	TARGET="${SYSCTL}/string_0001"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+	TEST_STR="Testing sysctl"
+	# Only string sysctls support seeking/appending.
+	MAXLEN=65
+
+	run_numerictests
+	run_stringtests
+}
+
+sysctl_test_0003()
+{
+	TARGET="${SYSCTL}/int_0002"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+	TEST_STR=$(( $ORIG + 1 ))
+
+	run_numerictests
+	run_limit_digit
+	run_limit_digit_int
+}
+
+sysctl_test_0004()
+{
+	TARGET="${SYSCTL}/uint_0001"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+	TEST_STR=$(( $ORIG + 1 ))
+
+	run_numerictests
+	run_limit_digit
+	run_limit_digit_uint
+}
+
+sysctl_test_0005()
+{
+	TARGET="${SYSCTL}/int_0003"
+	reset_vals
+	ORIG=$(cat "${TARGET}")
+
+	run_limit_digit_int_array
+}
+
+list_tests()
+{
+	echo "Test ID list:"
+	echo
+	echo "TEST_ID x NUM_TEST"
+	echo "TEST_ID:   Test ID"
+	echo "NUM_TESTS: Number of recommended times to run the test"
+	echo
+	echo "0001 x $(get_test_count 0001) - tests proc_dointvec_minmax()"
+	echo "0002 x $(get_test_count 0002) - tests proc_dostring()"
+	echo "0003 x $(get_test_count 0003) - tests proc_dointvec()"
+	echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
+	echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
+}
+
+test_reqs
+
+usage()
+{
+	NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .)
+	let NUM_TESTS=$NUM_TESTS+1
+	MAX_TEST=$(printf "%04d\n" $NUM_TESTS)
+	echo "Usage: $0 [ -t <4-number-digit> ] | [ -w <4-number-digit> ] |"
+	echo "		 [ -s <4-number-digit> ] | [ -c <4-number-digit> <test- count>"
+	echo "           [ all ] [ -h | --help ] [ -l ]"
+	echo ""
+	echo "Valid tests: 0001-$MAX_TEST"
+	echo ""
+	echo "    all     Runs all tests (default)"
+	echo "    -t      Run test ID the number amount of times is recommended"
+	echo "    -w      Watch test ID run until it runs into an error"
+	echo "    -c      Run test ID once"
+	echo "    -s      Run test ID x test-count number of times"
+	echo "    -l      List all test ID list"
+	echo " -h|--help  Help"
+	echo
+	echo "If an error every occurs execution will immediately terminate."
+	echo "If you are adding a new test try using -w <test-ID> first to"
+	echo "make sure the test passes a series of tests."
+	echo
+	echo Example uses:
+	echo
+	echo "$TEST_NAME.sh            -- executes all tests"
+	echo "$TEST_NAME.sh -t 0002    -- Executes test ID 0002 number of times is recomended"
+	echo "$TEST_NAME.sh -w 0002    -- Watch test ID 0002 run until an error occurs"
+	echo "$TEST_NAME.sh -s 0002    -- Run test ID 0002 once"
+	echo "$TEST_NAME.sh -c 0002 3  -- Run test ID 0002 three times"
+	echo
+	list_tests
+	exit 1
+}
+
+function test_num()
+{
+	re='^[0-9]+$'
+	if ! [[ $1 =~ $re ]]; then
+		usage
+	fi
+}
+
+function get_test_count()
+{
+	test_num $1
+	TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+	LAST_TWO=${TEST_DATA#*:*}
+	echo ${LAST_TWO%:*}
+}
+
+function get_test_enabled()
+{
+	test_num $1
+	TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+	echo ${TEST_DATA#*:*:}
+}
+
+function run_all_tests()
+{
+	for i in $ALL_TESTS ; do
+		TEST_ID=${i%:*:*}
+		ENABLED=$(get_test_enabled $TEST_ID)
+		TEST_COUNT=$(get_test_count $TEST_ID)
+		if [[ $ENABLED -eq "1" ]]; then
+			test_case $TEST_ID $TEST_COUNT
+		fi
+	done
+}
+
+function watch_log()
+{
+	if [ $# -ne 3 ]; then
+		clear
+	fi
+	date
+	echo "Running test: $2 - run #$1"
+}
+
+function watch_case()
+{
+	i=0
+	while [ 1 ]; do
+
+		if [ $# -eq 1 ]; then
+			test_num $1
+			watch_log $i ${TEST_NAME}_test_$1
+			${TEST_NAME}_test_$1
+		else
+			watch_log $i all
+			run_all_tests
+		fi
+		let i=$i+1
+	done
+}
+
+function test_case()
+{
+	NUM_TESTS=$DEFAULT_NUM_TESTS
+	if [ $# -eq 2 ]; then
+		NUM_TESTS=$2
+	fi
+
+	i=0
+	while [ $i -lt $NUM_TESTS ]; do
+		test_num $1
+		watch_log $i ${TEST_NAME}_test_$1 noclear
+		RUN_TEST=${TEST_NAME}_test_$1
+		$RUN_TEST
+		let i=$i+1
+	done
+}
+
+function parse_args()
+{
+	if [ $# -eq 0 ]; then
+		run_all_tests
+	else
+		if [[ "$1" = "all" ]]; then
+			run_all_tests
+		elif [[ "$1" = "-w" ]]; then
+			shift
+			watch_case $@
+		elif [[ "$1" = "-t" ]]; then
+			shift
+			test_num $1
+			test_case $1 $(get_test_count $1)
+		elif [[ "$1" = "-c" ]]; then
+			shift
+			test_num $1
+			test_num $2
+			test_case $1 $2
+		elif [[ "$1" = "-s" ]]; then
+			shift
+			test_case $1 1
+		elif [[ "$1" = "-l" ]]; then
+			list_tests
+		elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
+			usage
+		else
+			usage
+		fi
+	fi
+}
+
+test_reqs
+allow_user_defaults
+check_production_sysctl_writes_strict
+test_modprobe
+load_req_mod
+
+trap "test_finish" EXIT
+
+parse_args $@
+
+exit 0
diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore
new file mode 100644
index 000000000..7a60b85e1
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+*.pyc
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
new file mode 100644
index 000000000..49a6f8c3f
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/README
@@ -0,0 +1,247 @@
+tdc - Linux Traffic Control (tc) unit testing suite
+
+Author: Lucas Bates - lucasb@mojatatu.com
+
+tdc is a Python script to load tc unit tests from a separate JSON file and
+execute them inside a network namespace dedicated to the task.
+
+
+REQUIREMENTS
+------------
+
+*  Minimum Python version of 3.4. Earlier 3.X versions may work but are not
+   guaranteed.
+
+*  The kernel must have network namespace support
+
+*  The kernel must have veth support available, as a veth pair is created
+   prior to running the tests.
+
+*  The kernel must have the appropriate infrastructure enabled to run all tdc
+   unit tests. See the config file in this directory for minimum required
+   features. As new tests will be added, config options list will be updated.
+
+*  All tc-related features being tested must be built in or available as
+   modules.  To check what is required in current setup run:
+   ./tdc.py -c
+
+   Note:
+   In the current release, tdc run will abort due to a failure in setup or
+   teardown commands - which includes not being able to run a test simply
+   because the kernel did not support a specific feature. (This will be
+   handled in a future version - the current workaround is to run the tests
+   on specific test categories that your kernel supports)
+
+
+BEFORE YOU RUN
+--------------
+
+The path to the tc executable that will be most commonly tested can be defined
+in the tdc_config.py file. Find the 'TC' entry in the NAMES dictionary and
+define the path.
+
+If you need to test a different tc executable on the fly, you can do so by
+using the -p option when running tdc:
+	./tdc.py -p /path/to/tc
+
+
+RUNNING TDC
+-----------
+
+To use tdc, root privileges are required.  This is because the
+commands being tested must be run as root.  The code that enforces
+execution by root uid has been moved into a plugin (see PLUGIN
+ARCHITECTURE, below).
+
+If nsPlugin is linked, all tests are executed inside a network
+namespace to prevent conflicts within the host.
+
+Running tdc without any arguments will run all tests. Refer to the section
+on command line arguments for more information, or run:
+	./tdc.py -h
+
+tdc will list the test names as they are being run, and print a summary in
+TAP (Test Anything Protocol) format when they are done. If tests fail,
+output captured from the failing test will be printed immediately following
+the failed test in the TAP output.
+
+
+OVERVIEW OF TDC EXECUTION
+-------------------------
+
+One run of tests is considered a "test suite" (this will be refined in the
+future).  A test suite has one or more test cases in it.
+
+A test case has four stages:
+
+  - setup
+  - execute
+  - verify
+  - teardown
+
+The setup and teardown stages can run zero or more commands.  The setup
+stage does some setup if the test needs it.  The teardown stage undoes
+the setup and returns the system to a "neutral" state so any other test
+can be run next.  These two stages require any commands run to return
+success, but do not otherwise verify the results.
+
+The execute and verify stages each run one command.  The execute stage
+tests the return code against one or more acceptable values.  The
+verify stage checks the return code for success, and also compares
+the stdout with a regular expression.
+
+Each of the commands in any stage will run in a shell instance.
+
+
+USER-DEFINED CONSTANTS
+----------------------
+
+The tdc_config.py file contains multiple values that can be altered to suit
+your needs. Any value in the NAMES dictionary can be altered without affecting
+the tests to be run. These values are used in the tc commands that will be
+executed as part of the test. More will be added as test cases require.
+
+Example:
+	$TC qdisc add dev $DEV1 ingress
+
+The NAMES values are used to substitute into the commands in the test cases.
+
+
+COMMAND LINE ARGUMENTS
+----------------------
+
+Run tdc.py -h to see the full list of available arguments.
+
+usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
+              [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v] [-N]
+              [-d DEVICE] [-P] [-n] [-V]
+
+Linux TC unit tests
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -p PATH, --path PATH  The full path to the tc executable to use
+  -v, --verbose         Show the commands that are being run
+  -N, --notap           Suppress tap results for command under test
+  -d DEVICE, --device DEVICE
+                        Execute the test case in flower category
+  -P, --pause           Pause execution just before post-suite stage
+
+selection:
+  select which test cases: files plus directories; filtered by categories
+  plus testids
+
+  -D DIR [DIR ...], --directory DIR [DIR ...]
+                        Collect tests from the specified directory(ies)
+                        (default [tc-tests])
+  -f FILE [FILE ...], --file FILE [FILE ...]
+                        Run tests from the specified file(s)
+  -c [CATG [CATG ...]], --category [CATG [CATG ...]]
+                        Run tests only from the specified category/ies, or if
+                        no category/ies is/are specified, list known
+                        categories.
+  -e ID [ID ...], --execute ID [ID ...]
+                        Execute the specified test cases with specified IDs
+
+action:
+  select action to perform on selected test cases
+
+  -l, --list            List all test cases, or those only within the
+                        specified category
+  -s, --show            Display the selected test cases
+  -i, --id              Generate ID numbers for new test cases
+
+netns:
+  options for nsPlugin (run commands in net namespace)
+
+  -n, --namespace
+                        Run commands in namespace as specified in tdc_config.py
+
+valgrind:
+  options for valgrindPlugin (run command under test under Valgrind)
+
+  -V, --valgrind        Run commands under valgrind
+
+
+PLUGIN ARCHITECTURE
+-------------------
+
+There is now a plugin architecture, and some of the functionality that
+was in the tdc.py script has been moved into the plugins.
+
+The plugins are in the directory plugin-lib.  The are executed from
+directory plugins.  Put symbolic links from plugins to plugin-lib,
+and name them according to the order you want them to run.
+
+Example:
+
+bjb@bee:~/work/tc-testing$ ls -l plugins
+total 4
+lrwxrwxrwx  1 bjb  bjb    27 Oct  4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+lrwxrwxrwx  1 bjb  bjb    25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+-rwxr-xr-x  1 bjb  bjb     0 Sep 29 15:56 __init__.py
+
+The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and
+must be called "SubPlugin" so tdc can find them.  They are
+distinguished from each other in the python program by their module
+name.
+
+This base class supplies "hooks" to run extra functions.  These hooks are as follows:
+
+pre- and post-suite
+pre- and post-case
+pre- and post-execute stage
+adjust-command (runs in all stages and receives the stage name)
+
+The pre-suite hook receives the number of tests and an array of test ids.
+This allows you to dump out the list of skipped tests in the event of a
+failure during setup or teardown stage.
+
+The pre-case hook receives the ordinal number and test id of the current test.
+
+The adjust-command hook receives the stage id (see list below) and the
+full command to be executed.  This allows for last-minute adjustment
+of the command.
+
+The stages are identified by the following strings:
+
+  - pre  (pre-suite)
+  - setup
+  - command
+  - verify
+  - teardown
+  - post (post-suite)
+
+
+To write a plugin, you need to inherit from TdcPlugin in
+TdcPlugin.py.  To use the plugin, you have to put the
+implementation file in plugin-lib, and add a symbolic link to it from
+plugins.  It will be detected at run time and invoked at the
+appropriate times.  There are a few examples in the plugin-lib
+directory:
+
+  - rootPlugin.py:
+      implements the enforcement of running as root
+  - nsPlugin.py:
+      sets up a network namespace and runs all commands in that namespace
+  - valgrindPlugin.py
+      runs each command in the execute stage under valgrind,
+      and checks for leaks.
+      This plugin will output an extra test for each test in the test file,
+      one is the existing output as to whether the test passed or failed,
+      and the other is a test whether the command leaked memory or not.
+      (This one is a preliminary version, it may not work quite right yet,
+      but the overall template is there and it should only need tweaks.)
+
+
+ACKNOWLEDGEMENTS
+----------------
+
+Thanks to:
+
+Jamal Hadi Salim, for providing valuable test cases
+Keara Leibovitz, who wrote the CLI test driver that I used as a base for the
+   first version of the tc testing suite. This work was presented at
+   Netdev 1.2 Tokyo in October 2016.
+Samir Hussain, for providing help while I dove into Python for the first time
+    and being a second eye for this code.
diff --git a/tools/testing/selftests/tc-testing/TODO.txt b/tools/testing/selftests/tc-testing/TODO.txt
new file mode 100644
index 000000000..c40698557
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/TODO.txt
@@ -0,0 +1,31 @@
+tc Testing Suite To-Do list:
+
+- Determine what tc features are supported in the kernel. If features are not
+  present, prevent the related categories from running.
+
+- Add support for multiple versions of tc to run successively
+
+- Improve error messages when tdc aborts its run.  Partially done - still
+  need to better handle problems in pre- and post-suite.
+
+- Use python logger module for debug/verbose output
+
+- Allow tdc to write its results to file.
+  Maybe use python logger module for this too.
+
+- A better implementation of the "hooks".  Currently, every plugin
+  will attempt to run a function at every hook point.  Could be
+  changed so that plugin __init__ methods will register functions to
+  be run in the various predefined times.  Then if a plugin does not
+  require action at a specific point, no penalty will be paid for
+  trying to run a function that will do nothing.
+
+- Proper exception handling - make an exception class and use it
+
+- a TestCase class, for easier testcase handling, searching, comparison
+
+- a TestSuite class
+  and a way to configure a test suite,
+  to automate running multiple "test suites" with different requirements
+
+- super simple test case example using ls, touch, etc
diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py
new file mode 100644
index 000000000..3ee9a6dac
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/TdcPlugin.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+class TdcPlugin:
+    def __init__(self):
+        super().__init__()
+        print(' -- {}.__init__'.format(self.sub_class))
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        self.testcount = testcount
+        self.testidlist = testidlist
+        if self.args.verbose > 1:
+            print(' -- {}.pre_suite'.format(self.sub_class))
+
+    def post_suite(self, index):
+        '''run commands after test_runner completes the test loop
+        index is the last ordinal number of test that was attempted'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_suite'.format(self.sub_class))
+
+    def pre_case(self, test_ordinal, testid):
+        '''run commands before test_runner does one test'''
+        if self.args.verbose > 1:
+            print(' -- {}.pre_case'.format(self.sub_class))
+        self.args.testid = testid
+        self.args.test_ordinal = test_ordinal
+
+    def post_case(self):
+        '''run commands after test_runner does one test'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_case'.format(self.sub_class))
+
+    def pre_execute(self):
+        '''run command before test-runner does the execute step'''
+        if self.args.verbose > 1:
+            print(' -- {}.pre_execute'.format(self.sub_class))
+
+    def post_execute(self):
+        '''run command after test-runner does the execute step'''
+        if self.args.verbose > 1:
+            print(' -- {}.post_execute'.format(self.sub_class))
+
+    def adjust_command(self, stage, command):
+        '''adjust the command'''
+        if self.args.verbose > 1:
+            print(' -- {}.adjust_command {}'.format(self.sub_class, stage))
+
+        # if stage == 'pre':
+        #     pass
+        # elif stage == 'setup':
+        #     pass
+        # elif stage == 'execute':
+        #     pass
+        # elif stage == 'verify':
+        #     pass
+        # elif stage == 'teardown':
+        #     pass
+        # elif stage == 'post':
+        #     pass
+        # else:
+        #     pass
+
+        return command
+
+    def add_args(self, parser):
+        '''Get the plugin args from the command line'''
+        self.argparser = parser
+        return self.argparser
+
+    def check_args(self, args, remaining):
+        '''Check that the args are set correctly'''
+        self.args = args
+        if self.args.verbose > 1:
+            print(' -- {}.check_args'.format(self.sub_class))
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile
new file mode 100644
index 000000000..dc92eb271
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+
+APIDIR := ../../../../include/uapi
+TEST_GEN_FILES = action.o
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+CLANG ?= clang
+LLC   ?= llc
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+ifeq ($(PROBE),)
+  CPU ?= probe
+else
+  CPU ?= generic
+endif
+
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I$(APIDIR) \
+	      $(CLANG_SYS_INCLUDES) \
+	      -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/%.o: %.c
+	$(CLANG) $(CLANG_FLAGS) \
+		 -O2 -target bpf -emit-llvm -c $< -o - |      \
+	$(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c
new file mode 100644
index 000000000..c32b99b80
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/action.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Davide Caratti, Red Hat inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s)
+{
+	return TC_ACT_OK;
+}
+
+__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s)
+{
+	s->data = 0x0;
+	return TC_ACT_OK;
+}
+
+char _license[] __attribute__((section("license"),used)) = "GPL";
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
new file mode 100644
index 000000000..203302065
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/config
@@ -0,0 +1,48 @@
+CONFIG_NET_SCHED=y
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_INGRESS=m
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_EMATCH_IPSET=m
+CONFIG_NET_EMATCH_IPT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_SAMPLE=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_BPF=m
+CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_SKBMOD=m
+CONFIG_NET_ACT_IFE=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_IFE_SKBMARK=m
+CONFIG_NET_IFE_SKBPRIO=m
+CONFIG_NET_IFE_SKBTCINDEX=m
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_SCH_FIFO=y
diff --git a/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
new file mode 100644
index 000000000..c18f88d09
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
@@ -0,0 +1,104 @@
+tdc - Adding plugins for tdc
+
+Author: Brenda J. Butler - bjb@mojatatu.com
+
+ADDING PLUGINS
+--------------
+
+A new plugin should be written in python as a class that inherits from TdcPlugin.
+There are some examples in plugin-lib.
+
+The plugin can be used to add functionality to the test framework,
+such as:
+
+- adding commands to be run before and/or after the test suite
+- adding commands to be run before and/or after the test cases
+- adding commands to be run before and/or after the execute phase of the test cases
+- ability to alter the command to be run in any phase:
+    pre        (the pre-suite stage)
+    prepare
+    execute
+    verify
+    teardown
+    post       (the post-suite stage)
+- ability to add to the command line args, and use them at run time
+
+
+The functions in the class should follow the following interfaces:
+
+    def __init__(self)
+    def pre_suite(self, testcount, testidlist)     # see "PRE_SUITE" below
+    def post_suite(self, ordinal)                  # see "SKIPPING" below
+    def pre_case(self, test_ordinal, testid)       # see "PRE_CASE" below
+    def post_case(self)
+    def pre_execute(self)
+    def post_execute(self)
+    def adjust_command(self, stage, command)       # see "ADJUST" below
+    def add_args(self, parser)                     # see "ADD_ARGS" below
+    def check_args(self, args, remaining)          # see "CHECK_ARGS" below
+
+
+PRE_SUITE
+
+This method takes a testcount (number of tests to be run) and
+testidlist (array of test ids for tests that will be run).  This is
+useful for various things, including when an exception occurs and the
+rest of the tests must be skipped.  The info is stored in the object,
+and the post_suite method can refer to it when dumping the "skipped"
+TAP output.  The tdc.py script will do that for the test suite as
+defined in the test case, but if the plugin is being used to run extra
+tests on each test (eg, check for memory leaks on associated
+co-processes) then that other tap output can be generated in the
+post-suite method using this info passed in to the pre_suite method.
+
+
+SKIPPING
+
+The post_suite method will receive the ordinal number of the last
+test to be attempted.  It can use this info when outputting
+the TAP output for the extra test cases.
+
+
+PRE_CASE
+
+The pre_case method will receive the ordinal number of the test
+and the test id.  Useful for outputing the extra test results.
+
+
+ADJUST
+
+The adjust_command method receives a string representing
+the execution stage and a string which is the actual command to be
+executed.  The plugin can adjust the command, based on the stage of
+execution.
+
+The stages are represented by the following strings:
+
+    'pre'
+    'setup'
+    'command'
+    'verify'
+    'teardown'
+    'post'
+
+The adjust_command method must return the adjusted command so tdc
+can use it.
+
+
+ADD_ARGS
+
+The add_args method receives the argparser object and can add
+arguments to it.  Care should be taken that the new arguments do not
+conflict with any from tdc.py or from other plugins that will be used
+concurrently.
+
+The add_args method should return the argparser object.
+
+
+CHECK_ARGS
+
+The check_args method is so that the plugin can do validation on
+the args, if needed.  If there is a problem, and Exception should
+be raised, with a string that explains the problem.
+
+eg:  raise Exception('plugin xxx, arg -y is wrong, fix it')
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
new file mode 100644
index 000000000..17b267ded
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
@@ -0,0 +1,100 @@
+tdc - Adding test cases for tdc
+
+Author: Lucas Bates - lucasb@mojatatu.com
+
+ADDING TEST CASES
+-----------------
+
+User-defined tests should be added by defining a separate JSON file.  This
+will help prevent conflicts when updating the repository. Refer to
+template.json for the required JSON format for test cases.
+
+Include the 'id' field, but do not assign a value. Running tdc with the -i
+option will generate a unique ID for that test case.
+
+tdc will recursively search the 'tc-tests' subdirectory (or the
+directories named with the -D option) for .json files.  Any test case
+files you create in these directories will automatically be included.
+If you wish to store your custom test cases elsewhere, be sure to run
+tdc with the -f argument and the path to your file, or the -D argument
+and the path to your directory(ies).
+
+Be aware of required escape characters in the JSON data - particularly
+when defining the match pattern. Refer to the supplied json test files
+for examples when in doubt.  The match pattern is written in json, and
+will be used by python.  So the match pattern will be a python regular
+expression, but should be written using json syntax.
+
+
+TEST CASE STRUCTURE
+-------------------
+
+Each test case has required data:
+
+id:           A unique alphanumeric value to identify a particular test case
+name:         Descriptive name that explains the command under test
+category:     A list of single-word descriptions covering what the command
+              under test is testing. Example: filter, actions, u32, gact, etc.
+setup:        The list of commands required to ensure the command under test
+              succeeds. For example: if testing a filter, the command to create
+              the qdisc would appear here.
+	      This list can be empty.
+	      Each command can be a string to be executed, or a list consisting
+	      of a string which is a command to be executed, followed by 1 or
+	      more acceptable exit codes for this command.
+	      If only a string is given for the command, then an exit code of 0
+	      will be expected.
+cmdUnderTest: The tc command being tested itself.
+expExitCode:  The code returned by the command under test upon its termination.
+              tdc will compare this value against the actual returned value.
+verifyCmd:    The tc command to be run to verify successful execution.
+              For example: if the command under test creates a gact action,
+              verifyCmd should be "$TC actions show action gact"
+matchPattern: A regular expression to be applied against the output of the
+              verifyCmd to prove the command under test succeeded. This pattern
+              should be as specific as possible so that a false positive is not
+              matched.
+matchCount:   How many times the regex in matchPattern should match. A value
+              of 0 is acceptable.
+teardown:     The list of commands to clean up after the test is completed.
+              The environment should be returned to the same state as when
+              this test was started: qdiscs deleted, actions flushed, etc.
+	      This list can be empty.
+	      Each command can be a string to be executed, or a list consisting
+	      of a string which is a command to be executed, followed by 1 or
+	      more acceptable exit codes for this command.
+	      If only a string is given for the command, then an exit code of 0
+	      will be expected.
+
+
+SETUP/TEARDOWN ERRORS
+---------------------
+
+If an error is detected during the setup/teardown process, execution of the
+tests will immediately stop with an error message and the namespace in which
+the tests are run will be destroyed. This is to prevent inaccurate results
+in the test cases.  tdc will output a series of TAP results for the skipped
+tests.
+
+Repeated failures of the setup/teardown may indicate a problem with the test
+case, or possibly even a bug in one of the commands that are not being tested.
+
+It's possible to include acceptable exit codes with the setup/teardown command
+so that it doesn't halt the script for an error that doesn't matter. Turn the
+individual command into a list, with the command being first, followed by all
+acceptable exit codes for the command.
+
+Example:
+
+A pair of setup commands.  The first can have exit code 0, 1 or 255, the
+second must have exit code 0.
+
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action reclassify index 65536"
+        ],
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/example.json b/tools/testing/selftests/tc-testing/creating-testcases/example.json
new file mode 100644
index 000000000..5ec501200
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-testcases/example.json
@@ -0,0 +1,55 @@
+[
+    {
+        "id": "1f",
+        "name": "simple test to test framework",
+        "category": [
+            "example"
+        ],
+        "setup": [
+            "mkdir mytest"
+        ],
+        "cmdUnderTest": "touch mytest/blorfl",
+        "expExitCode": "0",
+        "verifyCmd": "ls mytest/* | grep '[b]lorfl'",
+        "matchPattern": "orfl",
+        "matchCount": "1",
+        "teardown": [
+            "rm -rf mytest"
+        ]
+    },
+    {
+        "id": "2f",
+        "name": "simple test, no need for verify",
+        "category": [
+            "example"
+        ],
+        "setup": [
+            "mkdir mytest",
+            "touch mytest/blorfl"
+        ],
+        "cmdUnderTest": "ls mytest/blorfl",
+        "expExitCode": "0",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+            "rm -rf mytest"
+        ]
+    },
+    {
+        "id": "3f",
+        "name": "simple test, no need for setup or teardown (or verify)",
+        "category": [
+            "example"
+        ],
+        "setup": [
+        ],
+        "cmdUnderTest": "ip l l lo",
+        "expExitCode": "0",
+        "verifyCmd": "/bin/true",
+        "matchPattern": " ",
+        "matchCount": "0",
+        "teardown": [
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/template.json b/tools/testing/selftests/tc-testing/creating-testcases/template.json
new file mode 100644
index 000000000..8b99b86d6
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-testcases/template.json
@@ -0,0 +1,51 @@
+[
+    {
+        "id": "",
+        "name": "",
+        "category": [
+            "",
+            ""
+        ],
+        "setup": [
+            ""
+        ],
+        "cmdUnderTest": "",
+        "expExitCode": "",
+        "verifyCmd": "",
+        "matchPattern": "",
+        "matchCount": "",
+        "teardown": [
+            ""
+        ]
+    },
+    {
+        "id": "",
+        "name": "",
+        "category": [
+            "",
+            ""
+        ],
+        "setup": [
+            "",
+	    [
+		"",
+		0,
+		1,
+		255
+	    ]
+        ],
+        "cmdUnderTest": "",
+        "expExitCode": "",
+        "verifyCmd": "",
+        "matchPattern": "",
+        "matchCount": "",
+        "teardown": [
+            "",
+	    [
+		"",
+		0,
+		255
+            ]
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
new file mode 100644
index 000000000..aa8a26697
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
@@ -0,0 +1,27 @@
+tdc.py will look for plugins in a directory plugins off the cwd.
+Make a set of numbered symbolic links from there to the actual plugins.
+Eg:
+
+tdc.py
+plugin-lib/
+plugins/
+    __init__.py
+    10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+    20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py
+    30-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+
+
+tdc.py will find them and use them.
+
+
+rootPlugin
+    Check if the uid is root.  If not, bail out.
+
+valgrindPlugin
+    Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests.
+    This plugin will write files to the cwd, called vgnd-xxx.log.  These will contain
+    the valgrind output for test xxx.  Any file matching the glob 'vgnd-*.log' will be
+    deleted at the end of the run.
+
+nsPlugin
+    Run all the commands in a network namespace.
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
new file mode 100644
index 000000000..a194b1af2
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -0,0 +1,141 @@
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'ns/SubPlugin'
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        super().pre_suite(testcount, testidlist)
+
+        if self.args.namespace:
+            self._ns_create()
+
+    def post_suite(self, index):
+        '''run commands after test_runner goes into a test loop'''
+        super().post_suite(index)
+        if self.args.verbose:
+            print('{}.post_suite'.format(self.sub_class))
+
+        if self.args.namespace:
+            self._ns_destroy()
+
+    def add_args(self, parser):
+        super().add_args(parser)
+        self.argparser_group = self.argparser.add_argument_group(
+            'netns',
+            'options for nsPlugin(run commands in net namespace)')
+        self.argparser_group.add_argument(
+            '-n', '--namespace', action='store_true',
+            help='Run commands in namespace')
+        return self.argparser
+
+    def adjust_command(self, stage, command):
+        super().adjust_command(stage, command)
+        cmdform = 'list'
+        cmdlist = list()
+
+        if not self.args.namespace:
+            return command
+
+        if self.args.verbose:
+            print('{}.adjust_command'.format(self.sub_class))
+
+        if not isinstance(command, list):
+            cmdform = 'str'
+            cmdlist = command.split()
+        else:
+            cmdlist = command
+        if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown':
+            if self.args.verbose:
+                print('adjust_command:  stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist))
+            cmdlist.insert(0, self.args.NAMES['NS'])
+            cmdlist.insert(0, 'exec')
+            cmdlist.insert(0, 'netns')
+            cmdlist.insert(0, 'ip')
+        else:
+            pass
+
+        if cmdform == 'str':
+            command = ' '.join(cmdlist)
+        else:
+            command = cmdlist
+
+        if self.args.verbose:
+            print('adjust_command:  return command [{}]'.format(command))
+        return command
+
+    def _ns_create(self):
+        '''
+        Create the network namespace in which the tests will be run and set up
+        the required network devices for it.
+        '''
+        if self.args.namespace:
+            cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip link set $DEV0 up'
+            self._exec_cmd('pre', cmd)
+            cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
+            self._exec_cmd('pre', cmd)
+            if self.args.device:
+                cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
+                self._exec_cmd('pre', cmd)
+                cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
+                self._exec_cmd('pre', cmd)
+
+    def _ns_destroy(self):
+        '''
+        Destroy the network namespace for testing (and any associated network
+        devices as well)
+        '''
+        if self.args.namespace:
+            cmd = 'ip netns delete {}'.format(self.args.NAMES['NS'])
+            self._exec_cmd('post', cmd)
+
+    def _exec_cmd(self, stage, command):
+        '''
+        Perform any required modifications on an executable command, then run
+        it in a subprocess and return the results.
+        '''
+        if '$' in command:
+            command = self._replace_keywords(command)
+
+        self.adjust_command(stage, command)
+        if self.args.verbose:
+            print('_exec_cmd:  command "{}"'.format(command))
+        proc = subprocess.Popen(command,
+            shell=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            env=ENVIR)
+        (rawout, serr) = proc.communicate()
+
+        if proc.returncode != 0 and len(serr) > 0:
+            foutput = serr.decode("utf-8")
+        else:
+            foutput = rawout.decode("utf-8")
+
+        proc.stdout.close()
+        proc.stderr.close()
+        return proc, foutput
+
+    def _replace_keywords(self, cmd):
+        """
+        For a given executable command, substitute any known
+        variables contained within NAMES with the correct values
+        """
+        tcmd = Template(cmd)
+        subcmd = tcmd.safe_substitute(self.args.NAMES)
+        return subcmd
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
new file mode 100644
index 000000000..e36775bd4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
@@ -0,0 +1,19 @@
+import os
+import sys
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'root/SubPlugin'
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        # run commands before test_runner goes into a test loop
+        super().pre_suite(testcount, testidlist)
+
+        if os.geteuid():
+            print('This script must be run with root privileges', file=sys.stderr)
+            exit(1)
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
new file mode 100644
index 000000000..477a7bd7d
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
@@ -0,0 +1,142 @@
+'''
+run the command under test, under valgrind and collect memory leak info
+as a separate test.
+'''
+
+
+import os
+import re
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+def vp_extract_num_from_string(num_as_string_maybe_with_commas):
+    return int(num_as_string_maybe_with_commas.replace(',',''))
+
+class SubPlugin(TdcPlugin):
+    def __init__(self):
+        self.sub_class = 'valgrind/SubPlugin'
+        self.tap = ''
+        super().__init__()
+
+    def pre_suite(self, testcount, testidlist):
+        '''run commands before test_runner goes into a test loop'''
+        super().pre_suite(testcount, testidlist)
+        if self.args.verbose > 1:
+            print('{}.pre_suite'.format(self.sub_class))
+        if self.args.valgrind:
+            self._add_to_tap('1..{}\n'.format(self.testcount))
+
+    def post_suite(self, index):
+        '''run commands after test_runner goes into a test loop'''
+        super().post_suite(index)
+        self._add_to_tap('\n|---\n')
+        if self.args.verbose > 1:
+            print('{}.post_suite'.format(self.sub_class))
+        print('{}'.format(self.tap))
+        if self.args.verbose < 4:
+            subprocess.check_output('rm -f vgnd-*.log', shell=True)
+
+    def add_args(self, parser):
+        super().add_args(parser)
+        self.argparser_group = self.argparser.add_argument_group(
+            'valgrind',
+            'options for valgrindPlugin (run command under test under Valgrind)')
+
+        self.argparser_group.add_argument(
+            '-V', '--valgrind', action='store_true',
+            help='Run commands under valgrind')
+
+        return self.argparser
+
+    def adjust_command(self, stage, command):
+        super().adjust_command(stage, command)
+        cmdform = 'list'
+        cmdlist = list()
+
+        if not self.args.valgrind:
+            return command
+
+        if self.args.verbose > 1:
+            print('{}.adjust_command'.format(self.sub_class))
+
+        if not isinstance(command, list):
+            cmdform = 'str'
+            cmdlist = command.split()
+        else:
+            cmdlist = command
+
+        if stage == 'execute':
+            if self.args.verbose > 1:
+                print('adjust_command:  stage is {}; inserting valgrind stuff in command [{}] list [{}]'.
+                      format(stage, command, cmdlist))
+            cmdlist.insert(0, '--track-origins=yes')
+            cmdlist.insert(0, '--show-leak-kinds=definite,indirect')
+            cmdlist.insert(0, '--leak-check=full')
+            cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid))
+            cmdlist.insert(0, '-v')  # ask for summary of non-leak errors
+            cmdlist.insert(0, ENVIR['VALGRIND_BIN'])
+        else:
+            pass
+
+        if cmdform == 'str':
+            command = ' '.join(cmdlist)
+        else:
+            command = cmdlist
+
+        if self.args.verbose > 1:
+            print('adjust_command:  return command [{}]'.format(command))
+        return command
+
+    def post_execute(self):
+        if not self.args.valgrind:
+            return
+
+        self.definitely_lost_re = re.compile(
+            r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL)
+        self.indirectly_lost_re = re.compile(
+            r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+        self.possibly_lost_re = re.compile(
+            r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+        self.non_leak_error_re = re.compile(
+            r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL)
+
+        def_num = 0
+        ind_num = 0
+        pos_num = 0
+        nle_num = 0
+
+        # what about concurrent test runs?  Maybe force them to be in different directories?
+        with open('vgnd-{}.log'.format(self.args.testid)) as vfd:
+            content = vfd.read()
+            def_mo = self.definitely_lost_re.search(content)
+            ind_mo = self.indirectly_lost_re.search(content)
+            pos_mo = self.possibly_lost_re.search(content)
+            nle_mo = self.non_leak_error_re.search(content)
+
+            if def_mo:
+                def_num = int(def_mo.group(2))
+            if ind_mo:
+                ind_num = int(ind_mo.group(2))
+            if pos_mo:
+                pos_num = int(pos_mo.group(2))
+            if nle_mo:
+                nle_num = int(nle_mo.group(1))
+
+        mem_results = ''
+        if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0):
+            mem_results += 'not '
+
+        mem_results += 'ok {} - {}-mem # {}\n'.format(
+            self.args.test_ordinal, self.args.testid, 'memory leak check')
+        self._add_to_tap(mem_results)
+        if mem_results.startswith('not '):
+            print('{}'.format(content))
+            self._add_to_tap(content)
+
+    def _add_to_tap(self, more_tap_output):
+        self.tap += more_tap_output
diff --git a/tools/testing/selftests/tc-testing/plugins/__init__.py b/tools/testing/selftests/tc-testing/plugins/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugins/__init__.py
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
new file mode 100644
index 000000000..1a9b282dd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -0,0 +1,294 @@
+[
+    {
+        "id": "d959",
+        "name": "Add cBPF action with valid bytecode",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 100",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "f84a",
+        "name": "Add cBPF action with invalid bytecode",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 100",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "e939",
+        "name": "Add eBPF action with valid object-file",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            "make -C bpf",
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 667",
+        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf",
+            "make -C bpf clean"
+        ]
+    },
+    {
+        "id": "282d",
+        "name": "Add eBPF action with invalid object-file",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            "make -C bpf",
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 667",
+        "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "make -C bpf clean"
+        ]
+    },
+    {
+        "id": "d819",
+        "name": "Replace cBPF bytecode and action control",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 555",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action replace action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 555",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 555",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' default-action drop.*index 555 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "6ae3",
+        "name": "Delete cBPF action ",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 444",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action delete action bpf index 444",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 444",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 444 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "3e0d",
+        "name": "List cBPF actions",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC action flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+        ],
+        "cmdUnderTest": "$TC action list action bpf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode",
+        "matchCount": "3",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "55ce",
+        "name": "Flush BPF actions",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+        ],
+        "cmdUnderTest": "$TC action flush action bpf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action bpf"
+        ]
+    },
+    {
+        "id": "ccc3",
+        "name": "Add cBPF action with duplicate index",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 4294967295"
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967295",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action bpf index 4294967295",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "89c7",
+        "name": "Add cBPF action with invalid index",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 12345",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action ls action bpf",
+        "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 12345",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    },
+    {
+        "id": "7ab9",
+        "name": "Add cBPF action with cookie",
+        "category": [
+            "actions",
+            "bpf"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action bpf",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' cookie d0d0d0d0d0d0d0d0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action bpf",
+        "matchPattern": "action order [0-9]*: bpf.*cookie d0d0d0d0d0d0d0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action bpf"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
new file mode 100644
index 000000000..13147a1f5
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
@@ -0,0 +1,291 @@
+[
+    {
+        "id": "2002",
+        "name": "Add valid connmark action with defaults",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action connmark",
+        "matchPattern": "action order [0-9]+: connmark zone 0 pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "56a5",
+        "name": "Add valid connmark action with control pass",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 1",
+        "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "7c66",
+        "name": "Add valid connmark action with control drop",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark drop index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 100",
+        "matchPattern": "action order [0-9]+: connmark zone 0 drop.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "a913",
+        "name": "Add valid connmark action with control pipe",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark pipe index 455",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 455",
+        "matchPattern": "action order [0-9]+: connmark zone 0 pipe.*index 455 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "bdd8",
+        "name": "Add valid connmark action with control reclassify",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark reclassify index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action connmark",
+        "matchPattern": "action order [0-9]+: connmark zone 0 reclassify.*index 7 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "b8be",
+        "name": "Add valid connmark action with control continue",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark continue index 17",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action connmark",
+        "matchPattern": "action order [0-9]+: connmark zone 0 continue.*index 17 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "d8a6",
+        "name": "Add valid connmark action with control jump",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark jump 10 index 17",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action connmark",
+        "matchPattern": "action order [0-9]+: connmark zone 0 jump 10.*index 17 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "aae8",
+        "name": "Add valid connmark action with zone argument",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark zone 100 pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 1",
+        "matchPattern": "action order [0-9]+: connmark zone 100 pipe.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "2f0b",
+        "name": "Add valid connmark action with invalid zone argument",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark zone 65536 reclassify index 21",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action connmark index 1",
+        "matchPattern": "action order [0-9]+: connmark zone 65536 reclassify.*index 21 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "9305",
+        "name": "Add connmark action with unsupported argument",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark zone 655 unsupp_arg pass index 2",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action connmark index 2",
+        "matchPattern": "action order [0-9]+: connmark zone 655 unsupp_arg pass.*index 2 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "71ca",
+        "name": "Add valid connmark action and replace it",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action connmark zone 777 pass index 555"
+        ],
+        "cmdUnderTest": "$TC actions replace action connmark zone 555 reclassify index 555",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 555",
+        "matchPattern": "action order [0-9]+: connmark zone 555 reclassify.*index 555 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "5f8f",
+        "name": "Add valid connmark action with cookie",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action connmark zone 555 pipe index 5 cookie aabbccddeeff112233445566778800a1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 5",
+        "matchPattern": "action order [0-9]+: connmark zone 555 pipe.*index 5 ref.*cookie aabbccddeeff112233445566778800a1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
new file mode 100644
index 000000000..a022792d3
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -0,0 +1,504 @@
+[
+    {
+        "id": "6d84",
+        "name": "Add csum iph action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum iph index 800",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 800",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 800 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "1862",
+        "name": "Add csum ip4h action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ip4h index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 7 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "15c6",
+        "name": "Add csum ipv4h action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ipv4h index 1122",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 1122",
+        "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 1122 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bf47",
+        "name": "Add csum icmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 1",
+        "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "cc1d",
+        "name": "Add csum igmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum igmp index 999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 999",
+        "matchPattern": "action order [0-9]*: csum \\(igmp\\) action pass.*index 999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bccc",
+        "name": "Add csum foobar action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum foobar index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "action order [0-9]*: csum \\(foobar\\) action pass.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "3bb4",
+        "name": "Add csum tcp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum tcp index 9999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 9999",
+        "matchPattern": "action order [0-9]*: csum \\(tcp\\) action pass.*index 9999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "759c",
+        "name": "Add csum udp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp index 334455",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 334455",
+        "matchPattern": "action order [0-9]*: csum \\(udp\\) action pass.*index 334455 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "bdb6",
+        "name": "Add csum udp xor iph action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp xor iph index 3",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "action order [0-9]*: csum \\(udp xor iph\\) action pass.*index 3 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "c220",
+        "name": "Add csum udplite action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udplite continue index 3",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 3",
+        "matchPattern": "action order [0-9]*: csum \\(udplite\\) action continue.*index 3 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "8993",
+        "name": "Add csum sctp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum sctp index 777",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 777",
+        "matchPattern": "action order [0-9]*: csum \\(sctp\\) action pass.*index 777 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "b138",
+        "name": "Add csum ip & icmp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ip and icmp pipe index 123",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 123",
+        "matchPattern": "action order [0-9]*: csum \\(iph, icmp\\) action pipe.*index 123 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "eeda",
+        "name": "Add csum ip & sctp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum ipv4h sctp continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 2",
+        "matchPattern": "action order [0-9]*: csum \\(iph, sctp\\) action continue.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "0017",
+        "name": "Add csum udp or tcp action",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp or tcp continue index 27",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 27",
+        "matchPattern": "action order [0-9]*: csum \\(tcp, udp\\) action continue.*index 27 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "b10b",
+        "name": "Add all 7 csum actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp ip4h sctp igmp udplite udp tcp index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(iph, icmp, igmp, tcp, udp, udplite, sctp\\).*index 7 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "ce92",
+        "name": "Add csum udp action with cookie",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum udp pipe index 7 cookie 12345678",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 7",
+        "matchPattern": "action order [0-9]*: csum \\(udp\\) action pipe.*index 7.*cookie 12345678",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "912f",
+        "name": "Add csum icmp action with large cookie",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action csum icmp pipe index 17 cookie aabbccddeeff1122",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action csum index 17",
+        "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pipe.*index 17.*cookie aabbccddeeff1122",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "879b",
+        "name": "Add batch of 32 csum tcp actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "^[ \t]+index [0-9]* ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "b4e9",
+        "name": "Delete batch of 32 csum actions",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action csum",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "0015",
+        "name": "Add batch of 32 csum tcp actions with large cookies",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action csum",
+        "matchPattern": "^[ \t]+index [0-9]* ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action csum"
+        ]
+    },
+    {
+        "id": "989e",
+        "name": "Delete batch of 32 csum actions with large cookies",
+        "category": [
+            "actions",
+            "csum"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action csum",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action csum",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
new file mode 100644
index 000000000..68c91023c
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -0,0 +1,540 @@
+[
+    {
+        "id": "e89a",
+        "name": "Add valid pass action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pass index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action pass.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "a02c",
+        "name": "Add valid pipe action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pipe index 6",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action pipe.*index 6 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "feef",
+        "name": "Add valid reclassify action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action reclassify index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action reclassify.*index 5 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "8a7a",
+        "name": "Add valid drop action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action drop index 30",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action drop.*index 30 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "9a52",
+        "name": "Add valid continue action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action continue index 432",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action continue.*index 432 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "d700",
+        "name": "Add invalid action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pump index 386",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action.*index 386 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "9215",
+        "name": "Add action with duplicate index",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action pipe index 15"
+        ],
+        "cmdUnderTest": "$TC actions add action drop index 15",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action drop.*index 15 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "798e",
+        "name": "Add action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action drop index 4294967296",
+        "expExitCode": "255",
+        "verifyCmd": "actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action drop.*index 4294967296 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "22be",
+        "name": "Add action with index at 32-bit maximum",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action drop index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action drop.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "ac2a",
+        "name": "List actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action reclassify index 101",
+            "$TC actions add action reclassify index 102",
+            "$TC actions add action reclassify index 103",
+            "$TC actions add action reclassify index 104",
+            "$TC actions add action reclassify index 105"
+        ],
+        "cmdUnderTest": "$TC actions list action gact",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action reclassify",
+        "matchCount": "5",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "3edf",
+        "name": "Flush gact actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            "$TC actions add action reclassify index 101",
+            "$TC actions add action reclassify index 102",
+            "$TC actions add action reclassify index 103",
+            "$TC actions add action reclassify index 104",
+            "$TC actions add action reclassify index 105"
+        ],
+        "cmdUnderTest": "$TC actions flush action gact",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action reclassify",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "63ec",
+        "name": "Delete pass action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action pass index 1"
+        ],
+        "cmdUnderTest": "$TC actions del action gact index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action pass.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "46be",
+        "name": "Delete pipe action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action pipe index 9"
+        ],
+        "cmdUnderTest": "$TC actions del action gact index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action pipe.*index 9 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "2e08",
+        "name": "Delete reclassify action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action reclassify index 65536"
+        ],
+        "cmdUnderTest": "$TC actions del action gact index 65536",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action reclassify.*index 65536 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "99c4",
+        "name": "Delete drop action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action drop index 16"
+        ],
+        "cmdUnderTest": "$TC actions del action gact index 16",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action drop.*index 16 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "fb6b",
+        "name": "Delete continue action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action continue index 32"
+        ],
+        "cmdUnderTest": "$TC actions del action gact index 32",
+        "expExitCode": "0",
+        "verifyCmd": "actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action continue.*index 32 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "0eb3",
+        "name": "Delete non-existent action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions del action gact index 2",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order [0-9]*: gact action",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "f02c",
+        "name": "Replace gact action",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action drop index 10",
+            "$TC actions add action drop index 12"
+        ],
+        "cmdUnderTest": "$TC actions replace action ok index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action gact",
+        "matchPattern": "action order [0-9]*: gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "525f",
+        "name": "Get gact action by index",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action drop index 3900800700"
+        ],
+        "cmdUnderTest": "$TC actions get action gact index 3900800700",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action gact index 3900800700",
+        "matchPattern": "index 3900800700",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "1021",
+        "name": "Add batch of 32 gact pass actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action pass index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "da7a",
+        "name": "Add batch of 32 gact continue actions with cookie",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "32",
+        "teardown": [
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "8aa3",
+        "name": "Delete batch of 32 gact continue actions",
+        "category": [
+            "actions",
+            "gact"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gact",
+                0,
+                1,
+                255
+            ],
+            "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
+        ],
+        "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action gact index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "^[ \t]+index [0-9]+ ref",
+        "matchCount": "0",
+        "teardown": []
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
new file mode 100644
index 000000000..0da3545ca
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -0,0 +1,1064 @@
+[
+    {
+        "id": "7682",
+        "name": "Create valid ife encode action with mark and pass control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "ef47",
+        "name": "Create valid ife encode action with mark and pipe control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "df43",
+        "name": "Create valid ife encode action with mark and continue control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*allow mark.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "e4cf",
+        "name": "Create valid ife encode action with mark and drop control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*use mark 789.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "ccba",
+        "name": "Create valid ife encode action with mark and reclassify control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 656768.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "a1cf",
+        "name": "Create valid ife encode action with mark and jump control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 2",
+        "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0[xX]ED3E.*use mark 65.*index 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "cb3d",
+        "name": "Create valid ife encode action with mark value at 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 90",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 4294967295.*index 90",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "1efb",
+        "name": "Create ife encode action with mark value exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 90",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark 4294967295999.*index 90",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "95ed",
+        "name": "Create valid ife encode action with prio and pass control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow prio.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "aa17",
+        "name": "Create valid ife encode action with prio and pipe control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 7.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "74c7",
+        "name": "Create valid ife encode action with prio and continue control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use prio 3.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "7a97",
+        "name": "Create valid ife encode action with prio and drop control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow prio.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "f66b",
+        "name": "Create valid ife encode action with prio and reclassify control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 998877.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "3056",
+        "name": "Create valid ife encode action with prio and jump control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 9",
+        "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0[xX]ED3E.*use prio 998877.*index 9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "7dd3",
+        "name": "Create valid ife encode action with prio value at 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 99",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 4294967295.*index 99",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "2ca1",
+        "name": "Create ife encode action with prio value exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 99",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 4294967298.*index 99",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "05bb",
+        "name": "Create valid ife encode action with tcindex and pass control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "ce65",
+        "name": "Create valid ife encode action with tcindex and pipe control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 111.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "09cd",
+        "name": "Create valid ife encode action with tcindex and continue control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "8eb5",
+        "name": "Create valid ife encode action with tcindex and continue control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "451a",
+        "name": "Create valid ife encode action with tcindex and drop control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 77",
+        "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow tcindex.*index 77",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "d76c",
+        "name": "Create valid ife encode action with tcindex and reclassify control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 77",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*allow tcindex.*index 77",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "e731",
+        "name": "Create valid ife encode action with tcindex and jump control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 77",
+        "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0[xX]ED3E.*allow tcindex.*index 77",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "b7b8",
+        "name": "Create valid ife encode action with tcindex value at 16-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*use tcindex 65535.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "d0d8",
+        "name": "Create ife encode action with tcindex value exceeding 16-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 65539.*index 1",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2a9c",
+        "name": "Create valid ife encode action with mac src parameter",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow mark src 00:11:22:33:44:55.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "cf5c",
+        "name": "Create valid ife encode action with mac dst parameter",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "2353",
+        "name": "Create valid ife encode action with mac src and mac dst parameters",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 11",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "552c",
+        "name": "Create valid ife encode action with mark and type parameters",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]FEFE.*use mark 7.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "0421",
+        "name": "Create valid ife encode action with prio and type parameters",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 21",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ABBA.*use prio 444.*index 21",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "4017",
+        "name": "Create valid ife encode action with tcindex and type parameters",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 21",
+        "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ABCD.*use tcindex 5000.*index 21",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "fac3",
+        "name": "Create valid ife encode action with index at 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 4294967295",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "7c25",
+        "name": "Create valid ife decode action with pass control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action pass.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "dccb",
+        "name": "Create valid ife decode action with pipe control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "7bb9",
+        "name": "Create valid ife decode action with continue control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action continue.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "d9ad",
+        "name": "Create valid ife decode action with drop control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode drop index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action drop.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "219f",
+        "name": "Create valid ife decode action with reclassify control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "8f44",
+        "name": "Create valid ife decode action with jump control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 1",
+        "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "56cf",
+        "name": "Create ife encode action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4294967295999",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295999",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ee94",
+        "name": "Create ife encode action with invalid control",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0[xX]ED3E.*allow mark.*index 4",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "b330",
+        "name": "Create ife encode action with cookie",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1",
+        "matchCount": "1",
+        "teardown": [
+           "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "bbc0",
+        "name": "Create ife encode action with invalid argument",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow foo.*index 4",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "d54a",
+        "name": "Create ife encode action with invalid type argument",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]11170.*allow prio.*index 4",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "7ee0",
+        "name": "Create ife encode action with invalid mac src argument",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio src 00:11:22:33:44:pp pipe index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "0a7d",
+        "name": "Create ife encode action with invalid mac dst argument",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action ife encode allow prio dst 00.111-22:33:44:aa pipe index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action ife index 4",
+        "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
+        "matchCount": "0",
+        "teardown": []
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
new file mode 100644
index 000000000..db49fd0f8
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -0,0 +1,438 @@
+[
+    {
+        "id": "5124",
+        "name": "Add mirred mirror to egress action",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 dev lo",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "6fb4",
+        "name": "Add mirred redirect to egress action",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred egress redirect index 2 dev lo action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred",
+            "$TC actions flush action gact"
+        ]
+    },
+    {
+        "id": "ba38",
+        "name": "Get mirred actions",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action mirred egress mirror index 1 dev lo",
+            "$TC actions add action mirred egress redirect index 2 dev lo"
+        ],
+        "cmdUnderTest": "$TC actions show action mirred",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "[Mirror|Redirect] to device lo",
+        "matchCount": "2",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "d7c0",
+        "name": "Add invalid mirred direction",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred inbound mirror index 20 dev lo",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 20 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "e213",
+        "name": "Add invalid mirred action",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred egress remirror index 20 dev lo",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(Egress.*to device lo\\).*index 20 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "2d89",
+        "name": "Add mirred action with invalid device",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred egress mirror index 20 dev eltoh",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(.*to device eltoh\\).*index 20 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "300b",
+        "name": "Add mirred action with duplicate index",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action mirred egress redirect index 15 dev lo"
+        ],
+        "cmdUnderTest": "$TC actions add action mirred egress mirror index 15 dev lo",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 15 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "8917",
+        "name": "Add mirred mirror action with control pass",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 1",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "1054",
+        "name": "Add mirred mirror action with control pipe",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 15",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 15",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 15 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "9887",
+        "name": "Add mirred mirror action with control continue",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo continue index 15",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 15",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) continue.*index 15 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "e4aa",
+        "name": "Add mirred mirror action with control reclassify",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify index 150",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 150",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*index 150 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "ece9",
+        "name": "Add mirred mirror action with control drop",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo drop index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 99",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 99 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "0031",
+        "name": "Add mirred mirror action with control jump",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo jump 10 index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 99",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) jump 10.*index 99 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "407c",
+        "name": "Add mirred mirror action with cookie",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify cookie aa11bb22cc33dd44ee55",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*cookie aa11bb22cc33dd44ee55",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "8b69",
+        "name": "Add mirred mirror action with index at 32-bit maximum",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action mirred index 4294967295",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "3f66",
+        "name": "Add mirred mirror action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 429496729555",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action mirred index 429496729555",
+        "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 429496729555",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a70e",
+        "name": "Delete mirred mirror action",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action mirred egress mirror index 5 dev lo"
+        ],
+        "cmdUnderTest": "$TC actions del action mirred index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 5 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    },
+    {
+        "id": "3fb3",
+        "name": "Delete mirred redirect action",
+        "category": [
+            "actions",
+            "mirred"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action mirred",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action mirred egress redirect index 5 dev lo"
+        ],
+        "cmdUnderTest": "$TC actions del action mirred index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action mirred",
+        "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 5 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action mirred"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
new file mode 100644
index 000000000..0080dc2fd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
@@ -0,0 +1,593 @@
+[
+    {
+        "id": "7565",
+        "name": "Add nat action on ingress with default control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 192.168.1.1 200.200.200.1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat ingress 192.168.1.1/32 200.200.200.1 pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "fd79",
+        "name": "Add nat action on ingress with pipe control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 2.2.2.1 pipe index 77",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 77",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.1.1.1/32 2.2.2.1 pipe.*index 77 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "eab9",
+        "name": "Add nat action on ingress with continue control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 192.168.10.10 192.168.20.20 continue index 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 1000",
+        "matchPattern": "action order [0-9]+:  nat ingress 192.168.10.10/32 192.168.20.20 continue.*index 1000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "c53a",
+        "name": "Add nat action on ingress with reclassify control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 192.168.10.10 192.168.20.20 reclassify index 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 1000",
+        "matchPattern": "action order [0-9]+:  nat ingress 192.168.10.10/32 192.168.20.20 reclassify.*index 1000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "76c9",
+        "name": "Add nat action on ingress with jump control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 12.18.10.10 12.18.20.20 jump 10 index 22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 22",
+        "matchPattern": "action order [0-9]+:  nat ingress 12.18.10.10/32 12.18.20.20 jump 10.*index 22 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "24c6",
+        "name": "Add nat action on ingress with drop control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 drop index 722",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 722",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.18.1.1/32 1.18.2.2 drop.*index 722 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "2120",
+        "name": "Add nat action on ingress with maximum index value",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 4294967295",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.18.1.1/32 1.18.2.2 pass.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "3e9d",
+        "name": "Add nat action on ingress with invalid index value",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 index 4294967295555",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action nat index 4294967295555",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.18.1.1/32 1.18.2.2 pass.*index 4294967295555 ref",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "f6c9",
+        "name": "Add nat action on ingress with invalid IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 1.1888.2.2 index 7",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action nat index 7",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.1.1.1/32 1.1888.2.2 pass.*index 7 ref",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "be25",
+        "name": "Add nat action on ingress with invalid argument",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 1.18.2.2 another_arg index 12",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action nat index 12",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.1.1.1/32 1.18.2.2 pass.*another_arg.*index 12 ref",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "a7bd",
+        "name": "Add nat action on ingress with DEFAULT IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress default 10.10.10.1 index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 12",
+        "matchPattern": "action order [0-9]+:  nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "ee1e",
+        "name": "Add nat action on ingress with ANY IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress any 10.10.10.1 index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 12",
+        "matchPattern": "action order [0-9]+:  nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "1de8",
+        "name": "Add nat action on ingress with ALL IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress all 10.10.10.1 index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 12",
+        "matchPattern": "action order [0-9]+:  nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "8dba",
+        "name": "Add nat action on egress with default control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "19a7",
+        "name": "Add nat action on egress with pipe control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "f1d9",
+        "name": "Add nat action on egress with continue control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "6d4a",
+        "name": "Add nat action on egress with reclassify control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 reclassify",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "b313",
+        "name": "Add nat action on egress with jump control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 jump 777",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 jump 777",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "d9fc",
+        "name": "Add nat action on egress with drop control action",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat",
+        "matchPattern": "action order [0-9]+:  nat egress 10.10.10.1/32 20.20.20.1 drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "a895",
+        "name": "Add nat action on egress with DEFAULT IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress default 20.20.20.1 pipe index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 10",
+        "matchPattern": "action order [0-9]+:  nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "2572",
+        "name": "Add nat action on egress with ANY IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress any 20.20.20.1 pipe index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 10",
+        "matchPattern": "action order [0-9]+:  nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "37f3",
+        "name": "Add nat action on egress with ALL IP address",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 10",
+        "matchPattern": "action order [0-9]+:  nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "6054",
+        "name": "Add nat action on egress with cookie",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10 cookie aa1bc2d3eeff112233445566778800a1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 10",
+        "matchPattern": "action order [0-9]+:  nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "79d6",
+        "name": "Add nat action on ingress with cookie",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action nat ingress 192.168.1.1 10.10.10.1 reclassify index 1 cookie 112233445566778899aabbccddeeff11",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action nat index 1",
+        "matchPattern": "action order [0-9]+:  nat ingress 192.168.1.1/32 10.10.10.1 reclassify.*index 1 ref.*cookie 112233445566778899aabbccddeeff11",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
new file mode 100644
index 000000000..30f9b54bd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -0,0 +1,719 @@
+[
+    {
+        "id": "49aa",
+        "name": "Add valid basic police action",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 1Kbit burst 10Kb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "3abe",
+        "name": "Add police action with duplicate index",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action police rate 4Mbit burst 120k index 9"
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 8kbit burst 24k index 9",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x9",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "49fa",
+        "name": "Add valid police action with mtu",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 1k index 98",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 98",
+        "matchPattern": "action order [0-9]*:  police 0x62 rate 90Kbit burst 10Kb mtu 1Kb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "7943",
+        "name": "Add valid police action with peakrate",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100kbit index 3",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x3 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Kbit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "055e",
+        "name": "Add police action with peakrate and no mtu",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 5kbit burst 6kb peakrate 10kbit index 9",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x9 rate 5Kb burst 10Kb",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "f057",
+        "name": "Add police action with valid overhead",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 1mbit burst 100k overhead 64 index 64",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 64",
+        "matchPattern": "action order [0-9]*:  police 0x40 rate 1Mbit burst 100Kb mtu 2Kb action reclassify overhead 64b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "7ffb",
+        "name": "Add police action with ethernet linklayer type",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 2mbit burst 200k linklayer ethernet index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions show action police",
+        "matchPattern": "action order [0-9]*:  police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "3dda",
+        "name": "Add police action with atm linklayer type",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 2mbit burst 200k linklayer atm index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions show action police",
+        "matchPattern": "action order [0-9]*:  police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b linklayer atm",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "551b",
+        "name": "Add police actions with conform-exceed control continue/drop",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed continue/drop index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 1",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action continue/drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "0c70",
+        "name": "Add police actions with conform-exceed control pass/reclassify",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed pass/reclassify index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x4 rate 3Mbit burst 250Kb mtu 2Kb action pass/reclassify",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "d946",
+        "name": "Add police actions with conform-exceed control pass/pipe",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed pass/pipe index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x5 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "ddd6",
+        "name": "Add police action with invalid rate value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3tb burst 250k conform-exceed pass/pipe index 5",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x5 rate 3Tb burst 250Kb mtu 2Kb action pass/pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "f61c",
+        "name": "Add police action with invalid burst value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3kbit burst 250P conform-exceed pass/pipe index 5",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x5 rate 3Kbit burst 250Pb mtu 2Kb action pass/pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "6aaf",
+        "name": "Add police actions with conform-exceed control pass/pipe [with numeric values]",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 0/3 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 1",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "29b1",
+        "name": "Add police actions with conform-exceed control <invalid>/drop",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 10/drop index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "c26f",
+        "name": "Add police action with invalid peakrate value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100T index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Tbit",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "db04",
+        "name": "Add police action with invalid mtu value",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10kbit burst 10k mtu 2Pbit index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 10Kbit burst 1Kb mtu 2Pb",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "f3c9",
+        "name": "Add police action with cookie",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 1 cookie a1b1c1d1e1f12233bb",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 1",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 10Mbit burst 10Kb mtu 2Kb.*cookie a1b1c1d1e1f12233bb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "d190",
+        "name": "Add police action with maximum index",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 4294967295",
+        "matchPattern": "action order [0-9]*:  police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "336e",
+        "name": "Delete police action",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action police rate 5mbit burst 2m index 12"
+        ],
+        "cmdUnderTest": "$TC actions delete action police index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0xc rate 5Mb burst 2Mb",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "77fa",
+        "name": "Get single police action from many actions",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action police rate 1mbit burst 100k index 1",
+            "$TC actions add action police rate 2mbit burst 200k index 2",
+            "$TC actions add action police rate 3mbit burst 300k index 3",
+            "$TC actions add action police rate 4mbit burst 400k index 4",
+            "$TC actions add action police rate 5mbit burst 500k index 5",
+            "$TC actions add action police rate 6mbit burst 600k index 6",
+            "$TC actions add action police rate 7mbit burst 700k index 7",
+            "$TC actions add action police rate 8mbit burst 800k index 8"
+        ],
+        "cmdUnderTest": "$TC actions get action police index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 4",
+        "matchPattern": "action order [0-9]*:  police 0x4 rate 4Mbit burst 400Kb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "aa43",
+        "name": "Get single police action without specifying index",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action police rate 1mbit burst 100k index 1"
+        ],
+        "cmdUnderTest": "$TC actions get action police",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action police",
+        "matchPattern": "action order [0-9]*:  police",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "858b",
+        "name": "List police actions",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action police rate 1mbit burst 100k index 1",
+            "$TC actions add action police rate 2mbit burst 200k index 2",
+            "$TC actions add action police rate 3mbit burst 300k index 3",
+            "$TC actions add action police rate 4mbit burst 400k index 4",
+            "$TC actions add action police rate 5mbit burst 500k index 5",
+            "$TC actions add action police rate 6mbit burst 600k index 6",
+            "$TC actions add action police rate 7mbit burst 700k index 7",
+            "$TC actions add action police rate 8mbit burst 800k index 8"
+        ],
+        "cmdUnderTest": "$TC actions list action police",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x[1-8] rate [1-8]Mbit burst [1-8]00Kb",
+        "matchCount": "8",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "1c3a",
+        "name": "Flush police actions",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            "$TC actions add action police rate 1mbit burst 100k index 1",
+            "$TC actions add action police rate 2mbit burst 200k index 2",
+            "$TC actions add action police rate 3mbit burst 300k index 3",
+            "$TC actions add action police rate 4mbit burst 400k index 4",
+            "$TC actions add action police rate 5mbit burst 500k index 5",
+            "$TC actions add action police rate 6mbit burst 600k index 6",
+            "$TC actions add action police rate 7mbit burst 700k index 7",
+            "$TC actions add action police rate 8mbit burst 800k index 8"
+        ],
+        "cmdUnderTest": "$TC actions flush action police",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police",
+        "matchCount": "0",
+        "teardown": [
+            ""
+        ]
+    },
+    {
+        "id": "7326",
+        "name": "Add police action with control continue",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 1",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "34fa",
+        "name": "Add police action with control drop",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m drop index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "8dd5",
+        "name": "Add police action with control ok",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m ok index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "b9d1",
+        "name": "Add police action with control reclassify",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 1",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action reclassify",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "c534",
+        "name": "Add police action with control pipe",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
new file mode 100644
index 000000000..618def9bd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -0,0 +1,612 @@
+[
+    {
+        "id": "9784",
+        "name": "Add valid sample action with mandatory arguments",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 10 group 1 index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 2",
+        "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "5c91",
+        "name": "Add valid sample action with mandatory arguments and continue control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 700 group 2 continue index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 2",
+        "matchPattern": "action order [0-9]+: sample rate 1/700 group 2 continue.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "334b",
+        "name": "Add valid sample action with mandatory arguments and drop control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 10000 group 11 drop index 22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/10000 group 11 drop.*index 22 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "da69",
+        "name": "Add valid sample action with mandatory arguments and reclassify control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 20000 group 72 reclassify index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/20000 group 72 reclassify.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "13ce",
+        "name": "Add valid sample action with mandatory arguments and pipe control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 20 group 2 pipe index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/20 group 2 pipe.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "1886",
+        "name": "Add valid sample action with mandatory arguments and jump control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 700 group 25 jump 4 index 200",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 200",
+        "matchPattern": "action order [0-9]+: sample rate 1/700 group 25 jump 4.*index 200 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "7571",
+        "name": "Add sample action with invalid rate",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 0 group 1 index 2",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 2",
+        "matchPattern": "action order [0-9]+: sample rate 1/0 group 1.*index 2 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "b6d4",
+        "name": "Add sample action with mandatory arguments and invalid control action",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 200000 group 52 foo index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/200000 group 52 foo.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a874",
+        "name": "Add invalid sample action without mandatory arguments",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ac01",
+        "name": "Add invalid sample action without mandatory argument rate",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample group 10 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample.*group 10.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "4203",
+        "name": "Add invalid sample action without mandatory argument group",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "14a7",
+        "name": "Add invalid sample action without mandatory argument group",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "8f2e",
+        "name": "Add valid sample action with trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 1024 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 1024 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "45f8",
+        "name": "Add sample action with maximum rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294967295 group 4 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294967295 group 4 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "ad0c",
+        "name": "Add sample action with maximum trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 16000 group 4 trunc 4294967295 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/16000 group 4 trunc_size 4294967295 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "83a9",
+        "name": "Add sample action with maximum group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294 group 4294967295 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 1",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294 group 4294967295 pipe.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "ed27",
+        "name": "Add sample action with invalid rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4294967296 group 4 index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 10",
+        "matchPattern": "action order [0-9]+: sample rate 1/4294967296 group 4 pipe.*index 10 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2eae",
+        "name": "Add sample action with invalid group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 4098 group 5294967299 continue index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 1",
+        "matchPattern": "action order [0-9]+: sample rate 1/4098 group 5294967299 continue.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "6ff3",
+        "name": "Add sample action with invalid trunc size",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 112233445566 index 11",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 11",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 112233445566.*index 11 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2b2a",
+        "name": "Add sample action with invalid index",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 5294967299",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 5294967299",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 5294967299 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "dee2",
+        "name": "Add sample action with maximum allowed index",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 4294967295",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "560e",
+        "name": "Add sample action with cookie",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 45 cookie aabbccdd",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 45",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 45.*cookie aabbccdd",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "704a",
+        "name": "Replace existing sample action with new rate argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 2048 group 4 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/2048 group 4 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "60eb",
+        "name": "Replace existing sample action with new group argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "2cce",
+        "name": "Replace existing sample action with new trunc argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 trunc 48 index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 trunc 64 index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 trunc_size 64 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "59d1",
+        "name": "Replace existing sample action with new control argument",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 1024 group 4 reclassify index 4"
+        ],
+        "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 pipe index 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action sample",
+        "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
new file mode 100644
index 000000000..e89a7aa40
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -0,0 +1,130 @@
+[
+    {
+        "id": "b078",
+        "name": "Add simple action",
+        "category": [
+            "actions",
+            "simple"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action simple",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action simple sdata \"A triumph\" index 60",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action simple",
+        "matchPattern": "action order [0-9]*: Simple <A triumph>.*index 60 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action simple"
+        ]
+    },
+    {
+        "id": "6d4c",
+        "name": "Add simple action with duplicate index",
+        "category": [
+            "actions",
+            "simple"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action simple",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action simple sdata \"Aruba\" index 4"
+        ],
+        "cmdUnderTest": "$TC actions add action simple sdata \"Jamaica\" index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action simple",
+        "matchPattern": "action order [0-9]*: Simple <Jamaica>.*ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action simple"
+        ]
+    },
+    {
+        "id": "2542",
+        "name": "List simple actions",
+        "category": [
+            "actions",
+            "simple"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action simple",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action simple sdata \"Rock\"",
+            "$TC actions add action simple sdata \"Paper\"",
+            "$TC actions add action simple sdata \"Scissors\" index 98"
+        ],
+        "cmdUnderTest": "$TC actions list action simple",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action simple",
+        "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>",
+        "matchCount": "3",
+        "teardown": [
+            "$TC actions flush action simple"
+        ]
+    },
+    {
+        "id": "ea67",
+        "name": "Delete simple action",
+        "category": [
+            "actions",
+            "simple"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action simple",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action simple sdata \"Blinkenlights\" index 1"
+        ],
+        "cmdUnderTest": "$TC actions delete action simple index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action simple",
+        "matchPattern": "action order [0-9]*: Simple <Blinkenlights>.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action simple"
+        ]
+    },
+    {
+        "id": "8ff1",
+        "name": "Flush simple actions",
+        "category": [
+            "actions",
+            "simple"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action simple",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action simple sdata \"Kirk\"",
+            "$TC actions add action simple sdata \"Spock\" index 50",
+            "$TC actions add action simple sdata \"McCoy\" index 9"
+        ],
+        "cmdUnderTest": "$TC actions flush action simple",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action simple",
+        "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>",
+        "matchCount": "0",
+        "teardown": [
+            ""
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
new file mode 100644
index 000000000..5aaf593b9
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -0,0 +1,488 @@
+[
+    {
+        "id": "6236",
+        "name": "Add skbedit action with valid mark",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*: skbedit  mark 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "407b",
+        "name": "Add skbedit action with invalid mark",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 666777888999",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*:  skbedit mark",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "081d",
+        "name": "Add skbedit action with priority",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit prio 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*: skbedit  priority :99",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "cc37",
+        "name": "Add skbedit action with invalid priority",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit prio foo",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*:  skbedit priority",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "3c95",
+        "name": "Add skbedit action with queue_mapping",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit queue_mapping 909",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*: skbedit queue_mapping 909",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "985c",
+        "name": "Add skbedit action with invalid queue_mapping",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit queue_mapping 67000",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*:  skbedit queue_mapping",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "224f",
+        "name": "Add skbedit action with ptype host",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit ptype host",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*: skbedit  ptype host",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "d1a3",
+        "name": "Add skbedit action with ptype otherhost",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit ptype otherhost",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*: skbedit  ptype otherhost",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "b9c6",
+        "name": "Add skbedit action with invalid ptype",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit ptype openair",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*:  skbedit ptype openair",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "464a",
+        "name": "Add skbedit action with control pipe",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit ptype host pipe index 11",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 11",
+        "matchPattern": "action order [0-9]*: skbedit  ptype host pipe.*index 11 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "212f",
+        "name": "Add skbedit action with control reclassify",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 56789 reclassify index 90",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 90",
+        "matchPattern": "action order [0-9]*: skbedit  mark 56789 reclassify.*index 90 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "0651",
+        "name": "Add skbedit action with control pass",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 pass index 271",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 271",
+        "matchPattern": "action order [0-9]*: skbedit queue_mapping 3 pass.*index 271 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "cc53",
+        "name": "Add skbedit action with control drop",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 drop index 271",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 271",
+        "matchPattern": "action order [0-9]*: skbedit queue_mapping 3 drop.*index 271 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "ec16",
+        "name": "Add skbedit action with control jump",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 8 jump 9 index 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 2",
+        "matchPattern": "action order [0-9]*: skbedit  priority :8 jump 9.*index 2 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "db54",
+        "name": "Add skbedit action with control continue",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 32",
+        "matchPattern": "action order [0-9]*: skbedit  priority :16 continue.*index 32 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "1055",
+        "name": "Add skbedit action with cookie",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32 cookie deadbeef",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbedit index 32",
+        "matchPattern": "action order [0-9]*: skbedit  priority :16 continue.*index 32 ref.*cookie deadbeef",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "5172",
+        "name": "List skbedit actions",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action skbedit ptype otherhost",
+            "$TC actions add action skbedit ptype broadcast",
+            "$TC actions add action skbedit mark 59",
+            "$TC actions add action skbedit mark 409"
+        ],
+        "cmdUnderTest": "$TC actions list action skbedit",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*: skbedit",
+        "matchCount": "4",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "a6d6",
+        "name": "Add skbedit action with index",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4040404040",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "index 4040404040",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "38f3",
+        "name": "Delete skbedit action",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbedit",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action skbedit mark 42 index 9009"
+        ],
+        "cmdUnderTest": "$TC actions del action skbedit index 9009",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*:  skbedit mark 42",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    },
+    {
+        "id": "ce97",
+        "name": "Flush skbedit actions",
+        "category": [
+            "actions",
+            "skbedit"
+        ],
+        "setup": [
+            "$TC actions add action skbedit mark 500",
+            "$TC actions add action skbedit mark 501",
+            "$TC actions add action skbedit mark 502",
+            "$TC actions add action skbedit mark 503",
+            "$TC actions add action skbedit mark 504",
+            "$TC actions add action skbedit mark 505",
+            "$TC actions add action skbedit mark 506"
+        ],
+        "cmdUnderTest": "$TC actions flush action skbedit",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action skbedit",
+        "matchPattern": "action order [0-9]*:  skbedit",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action skbedit"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
new file mode 100644
index 000000000..fe3326e93
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -0,0 +1,396 @@
+[
+    {
+        "id": "7d50",
+        "name": "Add skbmod action to set destination mac",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set dmac 11:22:33:44:55:66 index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod pipe set dmac 11:22:33:44:55:66\\s+index 5",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "9b29",
+        "name": "Add skbmod action to set source mac",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set smac 77:88:99:AA:BB:CC index 7",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbmod index 7",
+        "matchPattern": "action order [0-9]*: skbmod pipe set smac 77:88:99:aa:bb:cc\\s+index 7",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "1724",
+        "name": "Add skbmod action with invalid mac",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set smac 00:44:55:44:55",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod pipe set smac 00:44:55:44:55",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "3cf1",
+        "name": "Add skbmod action with valid etype",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set etype 0xfefe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod pipe set etype 0xFEFE",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "a749",
+        "name": "Add skbmod action with invalid etype",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set etype 0xfefef",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod pipe set etype 0xFEFEF",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "bfe6",
+        "name": "Add skbmod action to swap mac",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod swap mac",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbmod index 1",
+        "matchPattern": "action order [0-9]*: skbmod pipe swap mac",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "839b",
+        "name": "Add skbmod action with control pipe",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod swap mac pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod pipe swap mac",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "c167",
+        "name": "Add skbmod action with control reclassify",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set etype 0xbeef reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod reclassify set etype 0xBEEF",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "0c2f",
+        "name": "Add skbmod action with control drop",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set etype 0x0001 drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbmod index 1",
+        "matchPattern": "action order [0-9]*: skbmod drop set etype 0x1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "d113",
+        "name": "Add skbmod action with control continue",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set etype 0x1 continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod continue set etype 0x1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "7242",
+        "name": "Add skbmod action with control pass",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set smac 00:00:00:00:00:01 pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod pass set smac 00:00:00:00:00:01",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "6046",
+        "name": "Add skbmod action with control reclassify and cookie",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action skbmod set smac 00:01:02:03:04:01 reclassify index 1 cookie ddeeffaabb11cc22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbmod index 1",
+        "matchPattern": "action order [0-9]*: skbmod reclassify set smac 00:01:02:03:04:01.*index 1 ref.*cookie ddeeffaabb11cc22",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "58cb",
+        "name": "List skbmod actions",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action skbmod set etype 0x0001",
+            "$TC actions add action skbmod set etype 0x0011",
+            "$TC actions add action skbmod set etype 0x0021",
+            "$TC actions add action skbmod set etype 0x0031",
+            "$TC actions add action skbmod set etype 0x0041"
+        ],
+        "cmdUnderTest": "$TC actions ls action skbmod",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod",
+        "matchCount": "5",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "9aa8",
+        "name": "Get a single skbmod action from a list",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action skbmod set etype 0x0001",
+            "$TC actions add action skbmod set etype 0x0011",
+            "$TC actions add action skbmod set etype 0x0021",
+            "$TC actions add action skbmod set etype 0x0031",
+            "$TC actions add action skbmod set etype 0x0041"
+        ],
+        "cmdUnderTest": "$TC actions ls action skbmod",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action skbmod index 4",
+        "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x31",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "e93a",
+        "name": "Delete an skbmod action",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action skbmod",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action skbmod set etype 0x1111 index 909"
+        ],
+        "cmdUnderTest": "$TC actions del action skbmod index 909",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x1111\\s+index 909",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    },
+    {
+        "id": "40c2",
+        "name": "Flush skbmod actions",
+        "category": [
+            "actions",
+            "skbmod"
+        ],
+        "setup": [
+            "$TC actions add action skbmod set etype 0x0001",
+            "$TC actions add action skbmod set etype 0x0011",
+            "$TC actions add action skbmod set etype 0x0021",
+            "$TC actions add action skbmod set etype 0x0031",
+            "$TC actions add action skbmod set etype 0x0041"
+        ],
+        "cmdUnderTest": "$TC actions flush action skbmod",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action skbmod",
+        "matchPattern": "action order [0-9]*: skbmod",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action skbmod"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
new file mode 100644
index 000000000..e7e15a733
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
@@ -0,0 +1,888 @@
+[
+    {
+        "id": "2b11",
+        "name": "Add tunnel_key set action with mandatory parameters",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "dc6b",
+        "name": "Add tunnel_key set action with missing mandatory src_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set dst_ip 20.20.20.2 id 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*dst_ip 20.20.20.2.*key_id 100",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "7f25",
+        "name": "Add tunnel_key set action with missing mandatory dst_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 id 100",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*key_id 100",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "a5e0",
+        "name": "Add tunnel_key set action with invalid src_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 300.168.100.1 dst_ip 192.168.200.1 id 7 index 1",
+        "expExitCode": "1",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 300.168.100.1.*dst_ip 192.168.200.1.*key_id 7.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "eaa8",
+        "name": "Add tunnel_key set action with invalid dst_ip parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.800.1 id 10 index 11",
+        "expExitCode": "1",
+        "verifyCmd": "$TC actions get action tunnel_key index 11",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 192.168.100.1.*dst_ip 192.168.800.1.*key_id 10.*index 11 ref",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "3b09",
+        "name": "Add tunnel_key set action with invalid id parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 112233445566778899 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 112233445566778899.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "9625",
+        "name": "Add tunnel_key set action with invalid dst_port parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 dst_port 998877 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 11.*dst_port 998877.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+	    [
+		"$TC actions flush action tunnel_key",
+		0,
+		1,
+		255
+	    ]
+        ]
+    },
+    {
+        "id": "05af",
+        "name": "Add tunnel_key set action with optional dst_port parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.200.1 id 789 dst_port 4000 index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 10",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.100.1.*dst_ip 192.168.200.1.*key_id 789.*dst_port 4000.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "da80",
+        "name": "Add tunnel_key set action with index at 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*id 11.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "d407",
+        "name": "Add tunnel_key set action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295678",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295678",
+        "matchPattern": "action order [0-9]+: tunnel_key set.*index 4294967295678 ref",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "5cba",
+        "name": "Add tunnel_key set action with id value at 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 4294967295 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 4294967295.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "e84a",
+        "name": "Add tunnel_key set action with id value exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42949672955 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42949672955.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+	]
+    },
+    {
+        "id": "9c19",
+        "name": "Add tunnel_key set action with dst_port value at 16-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "3bd9",
+        "name": "Add tunnel_key set action with dst_port value exceeding 16-bit maximum",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535789 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535789.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+	]
+    },
+    {
+        "id": "68e2",
+        "name": "Add tunnel_key unset action",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key unset index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*unset.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "6192",
+        "name": "Add tunnel_key unset continue action",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key unset continue index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*unset continue.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "061d",
+        "name": "Add tunnel_key set continue action with cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "8acb",
+        "name": "Add tunnel_key set continue action with invalid cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+	]
+    },
+    {
+        "id": "a07e",
+        "name": "Add tunnel_key action with no set/unset command specified",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "b227",
+        "name": "Add tunnel_key action with csum option",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 csum index 99",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 99",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*csum pipe.*index 99",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "58a7",
+        "name": "Add tunnel_key action with nocsum option",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7823 nocsum index 234",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 234",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7823.*nocsum pipe.*index 234",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "2575",
+        "name": "Add tunnel_key action with not-supported parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 foobar 999 index 4",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 4",
+        "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*foobar 999.*index 4",
+        "matchCount": "0",
+        "teardown": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ]
+    },
+    {
+        "id": "7a88",
+        "name": "Add tunnel_key action with cookie parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 index 4 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 4",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4f20",
+        "name": "Add tunnel_key action with a single geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "e33d",
+        "name": "Add tunnel_key action with multiple geneve options parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "0778",
+        "name": "Add tunnel_key action with invalid class geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 824212:80:00880022 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 824212:80:00880022.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4ae8",
+        "name": "Add tunnel_key action with invalid type geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:4224:00880022 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:4224:00880022.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "4039",
+        "name": "Add tunnel_key action with short data length geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "26a6",
+        "name": "Add tunnel_key action with non-multiple of 4 data length geneve option parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288428822 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288428822.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "f44d",
+        "name": "Add tunnel_key action with incomplete geneve options parameter",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42: index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "7afc",
+        "name": "Replace tunnel_key set action with all parameters",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 csum id 1 index 1"
+        ],
+        "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 nocsum id 11 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*nocsum pipe.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "364d",
+        "name": "Replace tunnel_key set action with all parameters and cookie",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 index 1 cookie aabbccddeeff112233445566778800a"
+        ],
+        "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 id 11 csum reclassify index 1 cookie a1b1c1d1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action tunnel_key index 1",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*csum reclassify.*index 1.*cookie a1b1c1d1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "937c",
+        "name": "Fetch all existing tunnel_key actions",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+            "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 jump 10 index 2",
+            "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+            "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+        ],
+        "cmdUnderTest": "$TC actions list action tunnel_key",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*dst_port 3128.*nocsum pipe.*index 1.*set.*src_ip 11.10.10.1.*dst_ip 21.20.20.2.*key_id 2.*dst_port 3129.*csum jump 10.*index 2.*set.*src_ip 12.10.10.1.*dst_ip 22.20.20.2.*key_id 3.*dst_port 3130.*csum pass.*index 3.*set.*src_ip 13.10.10.1.*dst_ip 23.20.20.2.*key_id 4.*dst_port 3131.*nocsum continue.*index 4",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "6783",
+        "name": "Flush all existing tunnel_key actions",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+            "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 reclassify index 2",
+            "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+            "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+        ],
+        "cmdUnderTest": "$TC actions flush action tunnel_key",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+:.*",
+        "matchCount": "0",
+        "teardown": [
+	    "$TC actions flush action tunnel_key"
+	]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
new file mode 100644
index 000000000..69ea09eef
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -0,0 +1,692 @@
+[
+    {
+        "id": "6f5a",
+        "name": "Add vlan pop action with pipe opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop pipe index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*pipe.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "df35",
+        "name": "Add vlan pop action with pass opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop pass index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*pass.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "b0d4",
+        "name": "Add vlan pop action with drop opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop drop index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*drop.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "95ee",
+        "name": "Add vlan pop action with reclassify opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop reclassify index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*reclassify.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "0283",
+        "name": "Add vlan pop action with continue opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop continue index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 8",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*continue.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "b6b9",
+        "name": "Add vlan pop action with jump opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop jump 10 index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*jump 10.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "87c3",
+        "name": "Add vlan pop action with trap opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop trap index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop trap.*index 8 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "a178",
+        "name": "Add vlan pop action with invalid opcode",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop foo index 8",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*foo.*index 8 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ee6f",
+        "name": "Add vlan pop action with index at 32-bit maximum",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "0dfa",
+        "name": "Add vlan pop action with index exceeding 32-bit maximum",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan pop reclassify index 429496729599",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action vlan index 429496729599",
+        "matchPattern": "action order [0-9]+: vlan.*pop.reclassify.*index 429496729599",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "2b91",
+        "name": "Add vlan invalid action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan bad_mode",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*bad_mode",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "57fc",
+        "name": "Add vlan push action with invalid protocol type",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push protocol ABCD",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "3989",
+        "name": "Add vlan push action with default protocol and priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 123 index 18",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 18",
+        "matchPattern": "action order [0-9]+: vlan.*push id 123 protocol 802.1Q priority 0 pipe.*index 18 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "79dc",
+        "name": "Add vlan push action with protocol 802.1Q and priority 3",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 77 protocol 802.1Q priority 3 continue index 734",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 734",
+        "matchPattern": "action order [0-9]+: vlan.*push id 77 protocol 802.1Q priority 3 continue.*index 734 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "4d73",
+        "name": "Add vlan push action with protocol 802.1AD",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 1024 protocol 802.1AD pass index 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 10000",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1024 protocol 802.1ad priority 0 pass.*index 10000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "1f4b",
+        "name": "Add vlan push action with maximum 12-bit vlan ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4094 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4094.*protocol 802.1Q.*priority 0.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "1f7b",
+        "name": "Add vlan push action with invalid vlan ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 5678 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 5678.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "fe40",
+        "name": "Add vlan push action with maximum 3-bit IEEE 802.1p priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4 priority 7 reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4.*protocol 802.1Q.*priority 7.*reclassify.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "5d02",
+        "name": "Add vlan push action with invalid IEEE 802.1p priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 5 priority 10 index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "6812",
+        "name": "Add vlan modify action for protocol 802.1Q",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 100",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "5a31",
+        "name": "Add vlan modify action for protocol 802.1AD",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 12",
+        "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "3deb",
+        "name": "Replace existing vlan push action with new ID",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 500 pipe index 12"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan push id 700 pipe index 12",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 12",
+        "matchPattern": "action order [0-9]+: vlan.*push id 700 protocol 802.1Q priority 0 pipe.*index 12 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "9e76",
+        "name": "Replace existing vlan push action with new protocol",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 1 protocol 802.1Q pipe index 1"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan push id 1 protocol 802.1ad pipe index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1ad priority 0 pipe.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "ede4",
+        "name": "Replace existing vlan push action with new priority",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 1 protocol 802.1Q priority 3 reclassify index 1"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan push id 1 priority 4 reclassify index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1Q priority 4 reclassify.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "d413",
+        "name": "Replace existing vlan pop action with new cookie",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan pop continue index 1 cookie 22334455"
+        ],
+        "cmdUnderTest": "$TC actions replace action vlan pop continue index 1 cookie a1b1c2d1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action vlan index 1",
+        "matchPattern": "action order [0-9]+: vlan.*pop continue.*index 1 ref.*cookie a1b1c2d1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    },
+    {
+        "id": "83a4",
+        "name": "Delete vlan pop action",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan pop index 44"
+        ],
+        "cmdUnderTest": "$TC actions del action vlan index 44",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*pop.*index 44 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "ed1e",
+        "name": "Delete vlan push action for protocol 802.1Q",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 4094 protocol 802.1Q index 999"
+        ],
+        "cmdUnderTest": "$TC actions del action vlan index 999",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4094 protocol 802.1Q priority 0 pipe.*index 999 ref",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "a2a3",
+        "name": "Flush vlan actions",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 10",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 11",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 12",
+            "$TC actions add action vlan push id 4 protocol 802.1ad index 13"
+        ],
+        "cmdUnderTest": "$TC actions flush action vlan",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4 protocol 802.1ad",
+        "matchCount": "0",
+        "teardown": []
+    },
+    {
+        "id": "1d78",
+        "name": "Add vlan push action with cookie",
+        "category": [
+            "actions",
+            "vlan"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action vlan",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action vlan push id 4 cookie a0a0a0a0a0a0a0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action vlan",
+        "matchPattern": "action order [0-9]+: vlan.*push id 4.*cookie a0a0a0a0a0a0a0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action vlan"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
new file mode 100644
index 000000000..3b97cfd7e
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
@@ -0,0 +1,1049 @@
+[
+    {
+        "id": "901f",
+        "name": "Add fw filter with prio at 32-bit maxixum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 65535 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65535 protocol all fw",
+        "matchPattern": "pref 65535 fw.*handle 0x1.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "51e2",
+        "name": "Add fw filter with prio exceeding 32-bit maxixum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 65536 fw action ok",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65536 protocol all fw",
+        "matchPattern": "pref 65536 fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d987",
+        "name": "Add fw filter with action ok",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "affe",
+        "name": "Add fw filter with action continue",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "28bc",
+        "name": "Add fw filter with action pipe",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8da2",
+        "name": "Add fw filter with action drop",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol all prio 1 fw",
+        "matchPattern": "handle 0x1.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9436",
+        "name": "Add fw filter with action reclassify",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action reclassify",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "95bb",
+        "name": "Add fw filter with action jump 10",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action jump 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action jump 10",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3d74",
+        "name": "Add fw filter with action goto chain 5",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action goto chain 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action goto chain 5",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "eb8f",
+        "name": "Add fw filter with invalid action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action pump",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "handle 0x1.*gact action pump",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6a79",
+        "name": "Add fw filter with missing mandatory action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "filter protocol all pref [0-9]+ fw.*handle 0x1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8298",
+        "name": "Add fw filter with cookie",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 2 protocol all fw",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action pipe.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "a88c",
+        "name": "Add fw filter with invalid cookie",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action continue cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 2 protocol all fw",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action continue.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "10f6",
+        "name": "Add fw filter with handle in hex",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xa1b2ff prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xa1b2ff prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa1b2ff.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9d51",
+        "name": "Add fw filter with handle at 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967295 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4294967295 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xffffffff.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d939",
+        "name": "Add fw filter with handle exceeding 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967296 prio 1 fw action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4294967296 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "658c",
+        "name": "Add fw filter with mask in hex",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/0xa1b2f prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa/0xa1b2f",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "86be",
+        "name": "Add fw filter with mask at 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/4294967295 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa[^/]",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e635",
+        "name": "Add fw filter with mask exceeding 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/4294967296 prio 1 fw action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6cab",
+        "name": "Add fw filter with handle/mask in hex",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xa1b2cdff/0x1a2bffdc prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xa1b2cdff prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xa1b2cdff/0x1a2bffdc",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8700",
+        "name": "Add fw filter with handle/mask at 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967295/4294967295 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xffffffff prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0xffffffff[^/]",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7d62",
+        "name": "Add fw filter with handle/mask exceeding 32-bit maximum",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967296/4294967296 prio 1 fw action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7b69",
+        "name": "Add fw filter with missing mandatory handle",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 1 fw action ok",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol all.*fw.*handle.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d68b",
+        "name": "Add fw filter with invalid parent",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent aa11b1b2: handle 1 prio 1 fw action ok",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter dev $DEV1 parent aa11b1b2: handle 1 prio 1 protocol all fw",
+        "matchPattern": "filter protocol all pref 1 fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "66e0",
+        "name": "Add fw filter with missing mandatory parent id",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 handle 1 prio 1 fw action ok",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "pref [0-9]+ fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "0ff3",
+        "name": "Add fw filter with classid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 3 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x1 classid :3.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9849",
+        "name": "Add fw filter with classid at root",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid ffff:ffff action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "pref 1 fw.*handle 0x1 classid root.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b7ff",
+        "name": "Add fw filter with classid - keeps last 8 (hex) digits",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 98765fedcb action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x1 classid 765f:edcb.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2b18",
+        "name": "Add fw filter with invalid classid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 6789defg action ok",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+        "matchPattern": "fw.*handle 0x1 classid 6789:defg.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "fade",
+        "name": "Add fw filter with flowid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10 prio 1 fw flowid 1:10 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xa classid 1:10.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "33af",
+        "name": "Add fw filter with flowid then classid (same arg, takes second)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 11 prio 1 fw flowid 10 classid 4 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 11 prio 1 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xb classid :4.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8a8c",
+        "name": "Add fw filter with classid then flowid (same arg, takes second)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 11 prio 1 fw classid 4 flowid 10 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 11 prio 1 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xb classid :10.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b50d",
+        "name": "Add fw filter with handle val/mask and flowid 10:1000",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 10/0xff fw flowid 10:1000 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 3 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0xa/0xff classid 10:1000.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7207",
+        "name": "Add fw filter with protocol ip",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 handle 3 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 3 prio 1 protocol ip fw",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 fw.*handle 0x3.*gact action pass.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "306d",
+        "name": "Add fw filter with protocol ipv6",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ipv6 prio 2 handle 4 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 2 protocol ipv6 fw",
+        "matchPattern": "filter parent ffff: protocol ipv6 pref 2 fw.*handle 0x4.*gact action pass.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9a78",
+        "name": "Add fw filter with protocol arp",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol arp prio 5 handle 7 fw action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 7 prio 5 protocol arp fw",
+        "matchPattern": "filter parent ffff: protocol arp pref 5 fw.*handle 0x7.*gact action drop.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1821",
+        "name": "Add fw filter with protocol 802_3",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol 802_3 handle 1 prio 1 fw action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol 802_3 fw",
+        "matchPattern": "filter parent ffff: protocol 802_3 pref 1 fw.*handle 0x1.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2260",
+        "name": "Add fw filter with invalid protocol",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol igmp handle 1 prio 1 fw action ok",
+        "expExitCode": "255",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol igmp fw",
+        "matchPattern": "filter parent ffff: protocol igmp pref 1 fw.*handle 0x1.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "09d7",
+        "name": "Add fw filters protocol 802_3 and ip with conflicting priorities",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: protocol 802_3 prio 3 handle 7 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 3 handle 8 fw action ok",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 8 prio 3 protocol ip fw",
+        "matchPattern": "filter parent ffff: protocol ip pref 3 fw.*handle 0x8",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6973",
+        "name": "Add fw filters with same index, same action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: prio 6 handle 2 fw action continue index 5"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 8 handle 4 fw action continue index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 8 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 8 fw.*handle 0x4.*gact action continue.*index 5 ref 2 bind 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "fc06",
+        "name": "Add fw filters with action police",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 4 fw action police rate 1kbit burst 10k index 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 3 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0x4.*police 0x5 rate 1Kbit burst 10Kb mtu 2Kb action reclassify overhead 0b.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "aac7",
+        "name": "Add fw filters with action police linklayer atm",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 4 fw action police rate 2mbit burst 200k linklayer atm index 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 3 protocol all fw",
+        "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0x4.*police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b linklayer atm.*ref 1 bind 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "5339",
+        "name": "Del entire fw filter",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "protocol all pref.*handle.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "0e99",
+        "name": "Del single fw filter x1",
+        "__comment__": "First of two tests to check that one filter is there and the other isn't",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 prio 9 fw action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "protocol all pref 7.*handle 0x5.*gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "f54c",
+        "name": "Del single fw filter x2",
+        "__comment__": "Second of two tests to check that one filter is there and the other isn't",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 prio 9 fw action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "protocol all pref 9.*handle 0x3.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "ba94",
+        "name": "Del fw filter by prio",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 4 fw action ok",
+            "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: prio 4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 4 fw.*gact action pass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4acb",
+        "name": "Del fw filter by chain",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 chain 13 fw action pipe",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 5 chain 13 fw action pipe"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: chain 13",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "fw chain 13 handle.*gact action pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3424",
+        "name": "Del fw filter by action (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action drop"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: fw action drop",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 2 prio 4 protocol all fw",
+        "matchPattern": "handle 0x2.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "da89",
+        "name": "Del fw filter by handle (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 3 prio 4 fw action continue"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 fw",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 3 prio 4 protocol all fw",
+        "matchPattern": "handle 0x3.*gact action continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4d95",
+        "name": "Del fw filter by protocol (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw action pipe"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol arp fw",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw",
+        "matchPattern": "filter parent ffff: protocol arp.*handle 0x4.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4736",
+        "name": "Del fw filter by flowid (invalid)",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 fw action pipe flowid 45"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: fw flowid 45",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "handle 0x4.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3dcb",
+        "name": "Replace fw filter action",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "eb4d",
+        "name": "Replace fw filter classid",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe classid 2",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 2 fw.*handle 0x1 classid :2.*gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "67ec",
+        "name": "Replace fw filter index",
+        "category": [
+            "filter",
+            "fw"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 3"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 16",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "pref 2 fw.*handle 0x1.*gact action pass.*index 16",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
new file mode 100644
index 000000000..99a5ffca1
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -0,0 +1,42 @@
+[
+    {
+        "id": "e9a3",
+        "name": "Add u32 with source match",
+        "category": [
+            "filter",
+            "u32"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "match 7f000001/ffffffff at 12",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d052",
+        "name": "Add 1M filters with the same action",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress",
+            "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000"
+        ],
+        "cmdUnderTest": "$TC -b $BATCH_FILE",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action gact",
+        "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress",
+            "/bin/rm $BATCH_FILE"
+        ]
+    }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
new file mode 100755
index 000000000..7607ba3e3
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -0,0 +1,663 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+tdc.py - Linux tc (Traffic Control) unit test driver
+
+Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
+"""
+
+import re
+import os
+import sys
+import argparse
+import importlib
+import json
+import subprocess
+import time
+import traceback
+from collections import OrderedDict
+from string import Template
+
+from tdc_config import *
+from tdc_helper import *
+
+import TdcPlugin
+
+
+class PluginMgrTestFail(Exception):
+    def __init__(self, stage, output, message):
+        self.stage = stage
+        self.output = output
+        self.message = message
+
+class PluginMgr:
+    def __init__(self, argparser):
+        super().__init__()
+        self.plugins = {}
+        self.plugin_instances = []
+        self.args = []
+        self.argparser = argparser
+
+        # TODO, put plugins in order
+        plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins')
+        for dirpath, dirnames, filenames in os.walk(plugindir):
+            for fn in filenames:
+                if (fn.endswith('.py') and
+                    not fn == '__init__.py' and
+                    not fn.startswith('#') and
+                    not fn.startswith('.#')):
+                    mn = fn[0:-3]
+                    foo = importlib.import_module('plugins.' + mn)
+                    self.plugins[mn] = foo
+                    self.plugin_instances.append(foo.SubPlugin())
+
+    def call_pre_suite(self, testcount, testidlist):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.pre_suite(testcount, testidlist)
+
+    def call_post_suite(self, index):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_suite(index)
+
+    def call_pre_case(self, test_ordinal, testid):
+        for pgn_inst in self.plugin_instances:
+            try:
+                pgn_inst.pre_case(test_ordinal, testid)
+            except Exception as ee:
+                print('exception {} in call to pre_case for {} plugin'.
+                      format(ee, pgn_inst.__class__))
+                print('test_ordinal is {}'.format(test_ordinal))
+                print('testid is {}'.format(testid))
+                raise
+
+    def call_post_case(self):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_case()
+
+    def call_pre_execute(self):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.pre_execute()
+
+    def call_post_execute(self):
+        for pgn_inst in reversed(self.plugin_instances):
+            pgn_inst.post_execute()
+
+    def call_add_args(self, parser):
+        for pgn_inst in self.plugin_instances:
+            parser = pgn_inst.add_args(parser)
+        return parser
+
+    def call_check_args(self, args, remaining):
+        for pgn_inst in self.plugin_instances:
+            pgn_inst.check_args(args, remaining)
+
+    def call_adjust_command(self, stage, command):
+        for pgn_inst in self.plugin_instances:
+            command = pgn_inst.adjust_command(stage, command)
+        return command
+
+    @staticmethod
+    def _make_argparser(args):
+        self.argparser = argparse.ArgumentParser(
+            description='Linux TC unit tests')
+
+
+def replace_keywords(cmd):
+    """
+    For a given executable command, substitute any known
+    variables contained within NAMES with the correct values
+    """
+    tcmd = Template(cmd)
+    subcmd = tcmd.safe_substitute(NAMES)
+    return subcmd
+
+
+def exec_cmd(args, pm, stage, command):
+    """
+    Perform any required modifications on an executable command, then run
+    it in a subprocess and return the results.
+    """
+    if len(command.strip()) == 0:
+        return None, None
+    if '$' in command:
+        command = replace_keywords(command)
+
+    command = pm.call_adjust_command(stage, command)
+    if args.verbose > 0:
+        print('command "{}"'.format(command))
+    proc = subprocess.Popen(command,
+        shell=True,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        env=ENVIR)
+    (rawout, serr) = proc.communicate()
+
+    if proc.returncode != 0 and len(serr) > 0:
+        foutput = serr.decode("utf-8", errors="ignore")
+    else:
+        foutput = rawout.decode("utf-8", errors="ignore")
+
+    proc.stdout.close()
+    proc.stderr.close()
+    return proc, foutput
+
+
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
+    """
+    Execute the setup/teardown commands for a test case.
+    Optionally terminate test execution if the command fails.
+    """
+    if args.verbose > 0:
+        print('{}'.format(prefix))
+    for cmdinfo in cmdlist:
+        if isinstance(cmdinfo, list):
+            exit_codes = cmdinfo[1:]
+            cmd = cmdinfo[0]
+        else:
+            exit_codes = [0]
+            cmd = cmdinfo
+
+        if not cmd:
+            continue
+
+        (proc, foutput) = exec_cmd(args, pm, stage, cmd)
+
+        if proc and (proc.returncode not in exit_codes):
+            print('', file=sys.stderr)
+            print("{} *** Could not execute: \"{}\"".format(prefix, cmd),
+                  file=sys.stderr)
+            print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
+                  file=sys.stderr)
+            print("returncode {}; expected {}".format(proc.returncode,
+                                                      exit_codes))
+            print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
+            print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
+            print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
+            raise PluginMgrTestFail(
+                stage, output,
+                '"{}" did not complete successfully'.format(prefix))
+
+def run_one_test(pm, args, index, tidx):
+    global NAMES
+    result = True
+    tresult = ""
+    tap = ""
+    if args.verbose > 0:
+        print("\t====================\n=====> ", end="")
+    print("Test " + tidx["id"] + ": " + tidx["name"])
+
+    # populate NAMES with TESTID for this test
+    NAMES['TESTID'] = tidx['id']
+
+    pm.call_pre_case(index, tidx['id'])
+    prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
+
+    if (args.verbose > 0):
+        print('-----> execute stage')
+    pm.call_pre_execute()
+    (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
+    if p:
+        exit_code = p.returncode
+    else:
+        exit_code = None
+
+    pm.call_post_execute()
+
+    if (exit_code is None or exit_code != int(tidx["expExitCode"])):
+        result = False
+        print("exit: {!r}".format(exit_code))
+        print("exit: {}".format(int(tidx["expExitCode"])))
+        #print("exit: {!r} {}".format(exit_code, int(tidx["expExitCode"])))
+        print(procout)
+    else:
+        if args.verbose > 0:
+            print('-----> verify stage')
+        match_pattern = re.compile(
+            str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
+        (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
+        if procout:
+            match_index = re.findall(match_pattern, procout)
+            if len(match_index) != int(tidx["matchCount"]):
+                result = False
+        elif int(tidx["matchCount"]) != 0:
+            result = False
+
+    if not result:
+        tresult += 'not '
+    tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name'])
+    tap += tresult
+
+    if result == False:
+        if procout:
+            tap += procout
+        else:
+            tap += 'No output!\n'
+
+    prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
+    pm.call_post_case()
+
+    index += 1
+
+    # remove TESTID from NAMES
+    del(NAMES['TESTID'])
+    return tap
+
+def test_runner(pm, args, filtered_tests):
+    """
+    Driver function for the unit tests.
+
+    Prints information about the tests being run, executes the setup and
+    teardown commands and the command under test itself. Also determines
+    success/failure based on the information in the test case and generates
+    TAP output accordingly.
+    """
+    testlist = filtered_tests
+    tcount = len(testlist)
+    index = 1
+    tap = ''
+    badtest = None
+    stage = None
+    emergency_exit = False
+    emergency_exit_message = ''
+
+    if args.notap:
+        if args.verbose:
+            tap = 'notap requested:  omitting test plan\n'
+    else:
+        tap = str(index) + ".." + str(tcount) + "\n"
+    try:
+        pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+    except Exception as ee:
+        ex_type, ex, ex_tb = sys.exc_info()
+        print('Exception {} {} (caught in pre_suite).'.
+              format(ex_type, ex))
+        # when the extra print statements are uncommented,
+        # the traceback does not appear between them
+        # (it appears way earlier in the tdc.py output)
+        # so don't bother ...
+        # print('--------------------(')
+        # print('traceback')
+        traceback.print_tb(ex_tb)
+        # print('--------------------)')
+        emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+        emergency_exit = True
+        stage = 'pre-SUITE'
+
+    if emergency_exit:
+        pm.call_post_suite(index)
+        return emergency_exit_message
+    if args.verbose > 1:
+        print('give test rig 2 seconds to stabilize')
+    time.sleep(2)
+    for tidx in testlist:
+        if "flower" in tidx["category"] and args.device == None:
+            if args.verbose > 1:
+                print('Not executing test {} {} because DEV2 not defined'.
+                      format(tidx['id'], tidx['name']))
+            continue
+        try:
+            badtest = tidx  # in case it goes bad
+            tap += run_one_test(pm, args, index, tidx)
+        except PluginMgrTestFail as pmtf:
+            ex_type, ex, ex_tb = sys.exc_info()
+            stage = pmtf.stage
+            message = pmtf.message
+            output = pmtf.output
+            print(message)
+            print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+                  format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+            print('---------------')
+            print('traceback')
+            traceback.print_tb(ex_tb)
+            print('---------------')
+            if stage == 'teardown':
+                print('accumulated output for this test:')
+                if pmtf.output:
+                    print(pmtf.output)
+            print('---------------')
+            break
+        index += 1
+
+    # if we failed in setup or teardown,
+    # fill in the remaining tests with ok-skipped
+    count = index
+    if not args.notap:
+        tap += 'about to flush the tap output if tests need to be skipped\n'
+        if tcount + 1 != index:
+            for tidx in testlist[index - 1:]:
+                msg = 'skipped - previous {} failed'.format(stage)
+                tap += 'ok {} - {} # {} {} {}\n'.format(
+                    count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
+                count += 1
+
+        tap += 'done flushing skipped test tap output\n'
+
+    if args.pause:
+        print('Want to pause\nPress enter to continue ...')
+        if input(sys.stdin):
+            print('got something on stdin')
+
+    pm.call_post_suite(index)
+
+    return tap
+
+def has_blank_ids(idlist):
+    """
+    Search the list for empty ID fields and return true/false accordingly.
+    """
+    return not(all(k for k in idlist))
+
+
+def load_from_file(filename):
+    """
+    Open the JSON file containing the test cases and return them
+    as list of ordered dictionary objects.
+    """
+    try:
+        with open(filename) as test_data:
+            testlist = json.load(test_data, object_pairs_hook=OrderedDict)
+    except json.JSONDecodeError as jde:
+        print('IGNORING test case file {}\n\tBECAUSE:  {}'.format(filename, jde))
+        testlist = list()
+    else:
+        idlist = get_id_list(testlist)
+        if (has_blank_ids(idlist)):
+            for k in testlist:
+                k['filename'] = filename
+    return testlist
+
+
+def args_parse():
+    """
+    Create the argument parser.
+    """
+    parser = argparse.ArgumentParser(description='Linux TC unit tests')
+    return parser
+
+
+def set_args(parser):
+    """
+    Set the command line arguments for tdc.
+    """
+    parser.add_argument(
+        '-p', '--path', type=str,
+        help='The full path to the tc executable to use')
+    sg = parser.add_argument_group(
+        'selection', 'select which test cases: ' +
+        'files plus directories; filtered by categories plus testids')
+    ag = parser.add_argument_group(
+        'action', 'select action to perform on selected test cases')
+
+    sg.add_argument(
+        '-D', '--directory', nargs='+', metavar='DIR',
+        help='Collect tests from the specified directory(ies) ' +
+        '(default [tc-tests])')
+    sg.add_argument(
+        '-f', '--file', nargs='+', metavar='FILE',
+        help='Run tests from the specified file(s)')
+    sg.add_argument(
+        '-c', '--category', nargs='*', metavar='CATG', default=['+c'],
+        help='Run tests only from the specified category/ies, ' +
+        'or if no category/ies is/are specified, list known categories.')
+    sg.add_argument(
+        '-e', '--execute', nargs='+', metavar='ID',
+        help='Execute the specified test cases with specified IDs')
+    ag.add_argument(
+        '-l', '--list', action='store_true',
+        help='List all test cases, or those only within the specified category')
+    ag.add_argument(
+        '-s', '--show', action='store_true', dest='showID',
+        help='Display the selected test cases')
+    ag.add_argument(
+        '-i', '--id', action='store_true', dest='gen_id',
+        help='Generate ID numbers for new test cases')
+    parser.add_argument(
+        '-v', '--verbose', action='count', default=0,
+        help='Show the commands that are being run')
+    parser.add_argument(
+        '-N', '--notap', action='store_true',
+        help='Suppress tap results for command under test')
+    parser.add_argument('-d', '--device',
+                        help='Execute the test case in flower category')
+    parser.add_argument(
+        '-P', '--pause', action='store_true',
+        help='Pause execution just before post-suite stage')
+    return parser
+
+
+def check_default_settings(args, remaining, pm):
+    """
+    Process any arguments overriding the default settings,
+    and ensure the settings are correct.
+    """
+    # Allow for overriding specific settings
+    global NAMES
+
+    if args.path != None:
+        NAMES['TC'] = args.path
+    if args.device != None:
+        NAMES['DEV2'] = args.device
+    if not os.path.isfile(NAMES['TC']):
+        print("The specified tc path " + NAMES['TC'] + " does not exist.")
+        exit(1)
+
+    pm.call_check_args(args, remaining)
+
+
+def get_id_list(alltests):
+    """
+    Generate a list of all IDs in the test cases.
+    """
+    return [x["id"] for x in alltests]
+
+
+def check_case_id(alltests):
+    """
+    Check for duplicate test case IDs.
+    """
+    idl = get_id_list(alltests)
+    return [x for x in idl if idl.count(x) > 1]
+
+
+def does_id_exist(alltests, newid):
+    """
+    Check if a given ID already exists in the list of test cases.
+    """
+    idl = get_id_list(alltests)
+    return (any(newid == x for x in idl))
+
+
+def generate_case_ids(alltests):
+    """
+    If a test case has a blank ID field, generate a random hex ID for it
+    and then write the test cases back to disk.
+    """
+    import random
+    for c in alltests:
+        if (c["id"] == ""):
+            while True:
+                newid = str('{:04x}'.format(random.randrange(16**4)))
+                if (does_id_exist(alltests, newid)):
+                    continue
+                else:
+                    c['id'] = newid
+                    break
+
+    ufilename = []
+    for c in alltests:
+        if ('filename' in c):
+            ufilename.append(c['filename'])
+    ufilename = get_unique_item(ufilename)
+    for f in ufilename:
+        testlist = []
+        for t in alltests:
+            if 'filename' in t:
+                if t['filename'] == f:
+                    del t['filename']
+                    testlist.append(t)
+        outfile = open(f, "w")
+        json.dump(testlist, outfile, indent=4)
+        outfile.write("\n")
+        outfile.close()
+
+def filter_tests_by_id(args, testlist):
+    '''
+    Remove tests from testlist that are not in the named id list.
+    If id list is empty, return empty list.
+    '''
+    newlist = list()
+    if testlist and args.execute:
+        target_ids = args.execute
+
+        if isinstance(target_ids, list) and (len(target_ids) > 0):
+            newlist = list(filter(lambda x: x['id'] in target_ids, testlist))
+    return newlist
+
+def filter_tests_by_category(args, testlist):
+    '''
+    Remove tests from testlist that are not in a named category.
+    '''
+    answer = list()
+    if args.category and testlist:
+        test_ids = list()
+        for catg in set(args.category):
+            if catg == '+c':
+                continue
+            print('considering category {}'.format(catg))
+            for tc in testlist:
+                if catg in tc['category'] and tc['id'] not in test_ids:
+                    answer.append(tc)
+                    test_ids.append(tc['id'])
+
+    return answer
+
+def get_test_cases(args):
+    """
+    If a test case file is specified, retrieve tests from that file.
+    Otherwise, glob for all json files in subdirectories and load from
+    each one.
+    Also, if requested, filter by category, and add tests matching
+    certain ids.
+    """
+    import fnmatch
+
+    flist = []
+    testdirs = ['tc-tests']
+
+    if args.file:
+        # at least one file was specified - remove the default directory
+        testdirs = []
+
+        for ff in args.file:
+            if not os.path.isfile(ff):
+                print("IGNORING file " + ff + "\n\tBECAUSE does not exist.")
+            else:
+                flist.append(os.path.abspath(ff))
+
+    if args.directory:
+        testdirs = args.directory
+
+    for testdir in testdirs:
+        for root, dirnames, filenames in os.walk(testdir):
+            for filename in fnmatch.filter(filenames, '*.json'):
+                candidate = os.path.abspath(os.path.join(root, filename))
+                if candidate not in testdirs:
+                    flist.append(candidate)
+
+    alltestcases = list()
+    for casefile in flist:
+        alltestcases = alltestcases + (load_from_file(casefile))
+
+    allcatlist = get_test_categories(alltestcases)
+    allidlist = get_id_list(alltestcases)
+
+    testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist)
+    idtestcases = filter_tests_by_id(args, alltestcases)
+    cattestcases = filter_tests_by_category(args, alltestcases)
+
+    cat_ids = [x['id'] for x in cattestcases]
+    if args.execute:
+        if args.category:
+            alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids]
+        else:
+            alltestcases = idtestcases
+    else:
+        if cat_ids:
+            alltestcases = cattestcases
+        else:
+            # just accept the existing value of alltestcases,
+            # which has been filtered by file/directory
+            pass
+
+    return allcatlist, allidlist, testcases_by_cats, alltestcases
+
+
+def set_operation_mode(pm, args):
+    """
+    Load the test case data and process remaining arguments to determine
+    what the script should do for this run, and call the appropriate
+    function.
+    """
+    ucat, idlist, testcases, alltests = get_test_cases(args)
+
+    if args.gen_id:
+        if (has_blank_ids(idlist)):
+            alltests = generate_case_ids(alltests)
+        else:
+            print("No empty ID fields found in test files.")
+        exit(0)
+
+    duplicate_ids = check_case_id(alltests)
+    if (len(duplicate_ids) > 0):
+        print("The following test case IDs are not unique:")
+        print(str(set(duplicate_ids)))
+        print("Please correct them before continuing.")
+        exit(1)
+
+    if args.showID:
+        for atest in alltests:
+            print_test_case(atest)
+        exit(0)
+
+    if isinstance(args.category, list) and (len(args.category) == 0):
+        print("Available categories:")
+        print_sll(ucat)
+        exit(0)
+
+    if args.list:
+        if args.list:
+            list_test_cases(alltests)
+            exit(0)
+
+    if len(alltests):
+        catresults = test_runner(pm, args, alltests)
+    else:
+        catresults = 'No tests found\n'
+    if args.notap:
+        print('Tap output suppression requested\n')
+    else:
+        print('All test results: \n\n{}'.format(catresults))
+
+def main():
+    """
+    Start of execution; set up argument parser and get the arguments,
+    and start operations.
+    """
+    parser = args_parse()
+    parser = set_args(parser)
+    pm = PluginMgr(parser)
+    parser = pm.call_add_args(parser)
+    (args, remaining) = parser.parse_known_args()
+    args.NAMES = NAMES
+    check_default_settings(args, remaining, pm)
+    if args.verbose > 2:
+        print('args is {}'.format(args))
+
+    set_operation_mode(pm, args)
+
+    exit(0)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
new file mode 100755
index 000000000..3d8350d6b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+"""
+tdc_batch.py - a script to generate TC batch file
+
+Copyright (C) 2017 Chris Mi <chrism@mellanox.com>
+"""
+
+import argparse
+
+parser = argparse.ArgumentParser(description='TC batch file generator')
+parser.add_argument("device", help="device name")
+parser.add_argument("file", help="batch file name")
+parser.add_argument("-n", "--number", type=int,
+                    help="how many lines in batch file")
+parser.add_argument("-o", "--skip_sw",
+                    help="skip_sw (offload), by default skip_hw",
+                    action="store_true")
+parser.add_argument("-s", "--share_action",
+                    help="all filters share the same action",
+                    action="store_true")
+parser.add_argument("-p", "--prio",
+                    help="all filters have different prio",
+                    action="store_true")
+args = parser.parse_args()
+
+device = args.device
+file = open(args.file, 'w')
+
+number = 1
+if args.number:
+    number = args.number
+
+skip = "skip_hw"
+if args.skip_sw:
+    skip = "skip_sw"
+
+share_action = ""
+if args.share_action:
+    share_action = "index 1"
+
+prio = "prio 1"
+if args.prio:
+    prio = ""
+    if number > 0x4000:
+        number = 0x4000
+
+index = 0
+for i in range(0x100):
+    for j in range(0x100):
+        for k in range(0x100):
+            mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
+            src_mac = "e4:11:00:" + mac
+            dst_mac = "e4:12:00:" + mac
+            cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+                   "src_mac {} dst_mac {} action drop {}".format
+                   (device, prio, skip, src_mac, dst_mac, share_action))
+            file.write("{}\n".format(cmd))
+            index += 1
+            if index >= number:
+                file.close()
+                exit(0)
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
new file mode 100644
index 000000000..d651bc150
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -0,0 +1,36 @@
+"""
+# SPDX-License-Identifier: GPL-2.0
+tdc_config.py - tdc user-specified values
+
+Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
+"""
+
+# Dictionary containing all values that can be substituted in executable
+# commands.
+NAMES = {
+          # Substitute your own tc path here
+          'TC': '/sbin/tc',
+          # Name of veth devices to be created for the namespace
+          'DEV0': 'v0p0',
+          'DEV1': 'v0p1',
+          'DEV2': '',
+          'BATCH_FILE': './batch.txt',
+          # Name of the namespace to use
+          'NS': 'tcut',
+          # Directory containing eBPF test programs
+          'EBPFDIR': './bpf'
+        }
+
+
+ENVIR = { }
+
+# put customizations in tdc_config_local.py
+try:
+    from tdc_config_local import *
+except ImportError as ie:
+    pass
+
+try:
+    NAMES.update(EXTRA_NAMES)
+except NameError as ne:
+    pass
diff --git a/tools/testing/selftests/tc-testing/tdc_config_local_template.py b/tools/testing/selftests/tc-testing/tdc_config_local_template.py
new file mode 100644
index 000000000..d48fc732a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_config_local_template.py
@@ -0,0 +1,23 @@
+"""
+tdc_config_local.py - tdc plugin-writer-specified values
+
+Copyright (C) 2017 bjb@mojatatu.com
+"""
+
+import os
+
+ENVIR = os.environ.copy()
+
+ENV_LD_LIBRARY_PATH = os.getenv('LD_LIBRARY_PATH', '')
+ENV_OTHER_LIB = os.getenv('OTHER_LIB', '')
+
+
+# example adding value to NAMES, without editing tdc_config.py
+EXTRA_NAMES = dict()
+EXTRA_NAMES['SOME_BIN'] = os.path.join(os.getenv('OTHER_BIN', ''), 'some_bin')
+
+
+# example adding values to ENVIR, without editing tdc_config.py
+ENVIR['VALGRIND_LIB'] = '/usr/lib/valgrind'
+ENVIR['VALGRIND_BIN'] = '/usr/bin/valgrind'
+ENVIR['VGDB_BIN'] = '/usr/bin/vgdb'
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py
new file mode 100644
index 000000000..9f35c96c8
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -0,0 +1,67 @@
+"""
+# SPDX-License-Identifier: GPL-2.0
+tdc_helper.py - tdc helper functions
+
+Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
+"""
+
+def get_categorized_testlist(alltests, ucat):
+    """ Sort the master test list into categories. """
+    testcases = dict()
+
+    for category in ucat:
+        testcases[category] = list(filter(lambda x: category in x['category'], alltests))
+
+    return(testcases)
+
+
+def get_unique_item(lst):
+    """ For a list, return a list of the unique items in the list. """
+    return list(set(lst))
+
+
+def get_test_categories(alltests):
+    """ Discover all unique test categories present in the test case file. """
+    ucat = []
+    for t in alltests:
+        ucat.extend(get_unique_item(t['category']))
+        ucat = get_unique_item(ucat)
+    return ucat
+
+def list_test_cases(testlist):
+    """ Print IDs and names of all test cases. """
+    for curcase in testlist:
+        print(curcase['id'] + ': (' + ', '.join(curcase['category']) + ") " + curcase['name'])
+
+
+def list_categories(testlist):
+    """ Show all categories that are present in a test case file. """
+    categories = set(map(lambda x: x['category'], testlist))
+    print("Available categories:")
+    print(", ".join(str(s) for s in categories))
+    print("")
+
+
+def print_list(cmdlist):
+    """ Print a list of strings prepended with a tab. """
+    for l in cmdlist:
+        if (type(l) == list):
+            print("\t" + str(l[0]))
+        else:
+            print("\t" + str(l))
+
+
+def print_sll(items):
+    print("\n".join(str(s) for s in items))
+
+
+def print_test_case(tcase):
+    """ Pretty-printing of a given test case. """
+    print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name']))
+    for k in tcase.keys():
+        if (isinstance(tcase[k], list)):
+            print(k + ":")
+            print_list(tcase[k])
+        else:
+            if not ((k == 'id') or (k == 'name')):
+                print(k + ": " + str(tcase[k]))
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
new file mode 100644
index 000000000..32a9eadb2
--- /dev/null
+++ b/tools/testing/selftests/timers/.gitignore
@@ -0,0 +1,21 @@
+alarmtimer-suspend
+change_skew
+clocksource-switch
+inconsistency-check
+leap-a-day
+leapcrash
+mqueue-lat
+nanosleep
+nsleep-lat
+posix_timers
+raw_skew
+rtcpie
+set-2038
+set-tai
+set-timer-lat
+skew_consistency
+threadtest
+valid-adjtimex
+adjtick
+set-tz
+freq-step
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
new file mode 100644
index 000000000..7656c7ce7
--- /dev/null
+++ b/tools/testing/selftests/timers/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
+LDLIBS += -lrt -lpthread -lm
+
+# these are all "safe" tests that don't modify
+# system time or require escalated privileges
+TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
+	     inconsistency-check raw_skew threadtest rtcpie
+
+DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
+		      skew_consistency clocksource-switch freq-step leap-a-day \
+		      leapcrash set-tai set-2038 set-tz
+
+TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
+
+
+include ../lib.mk
+
+# these tests require escalated privileges
+# and may modify the system time or trigger
+# other behavior like suspend
+run_destructive_tests: run_tests
+	$(call RUN_TESTS, $(DESTRUCTIVE_TESTS))
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c
new file mode 100644
index 000000000..54d8d87f3
--- /dev/null
+++ b/tools/testing/selftests/timers/adjtick.c
@@ -0,0 +1,211 @@
+/* adjtimex() tick adjustment test
+ *		by:   John Stultz <john.stultz@linaro.org>
+ *		(C) Copyright Linaro Limited 2015
+ *		Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc adjtick.c -o adjtick -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+
+#include "../kselftest.h"
+
+#define CLOCK_MONOTONIC_RAW	4
+
+#define NSEC_PER_SEC		1000000000LL
+#define USEC_PER_SEC		1000000
+
+#define MILLION			1000000
+
+long systick;
+
+long long llabs(long long val)
+{
+	if (val < 0)
+		val = -val;
+	return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+	return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+	struct timespec ts;
+
+	ts.tv_sec = ns/NSEC_PER_SEC;
+	ts.tv_nsec = ns%NSEC_PER_SEC;
+
+	return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+	long long start_ns, end_ns;
+
+	start_ns = ts_to_nsec(start);
+	end_ns = ts_to_nsec(end);
+
+	return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+	struct timespec start, mid, end;
+	long long diff = 0, tmp;
+	int i;
+
+	clock_gettime(CLOCK_MONOTONIC, mon);
+	clock_gettime(CLOCK_MONOTONIC_RAW, raw);
+
+	/* Try to get a more tightly bound pairing */
+	for (i = 0; i < 3; i++) {
+		long long newdiff;
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+
+		newdiff = diff_timespec(start, end);
+		if (diff == 0 || newdiff < diff) {
+			diff = newdiff;
+			*raw = mid;
+			tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+			*mon = nsec_to_ts(tmp);
+		}
+	}
+}
+
+long long get_ppm_drift(void)
+{
+	struct timespec mon_start, raw_start, mon_end, raw_end;
+	long long delta1, delta2, eppm;
+
+	get_monotonic_and_raw(&mon_start, &raw_start);
+
+	sleep(15);
+
+	get_monotonic_and_raw(&mon_end, &raw_end);
+
+	delta1 = diff_timespec(mon_start, mon_end);
+	delta2 = diff_timespec(raw_start, raw_end);
+
+	eppm = (delta1*MILLION)/delta2 - MILLION;
+
+	return eppm;
+}
+
+int check_tick_adj(long tickval)
+{
+	long long eppm, ppm;
+	struct timex tx1;
+
+	tx1.modes	 = ADJ_TICK;
+	tx1.modes	|= ADJ_OFFSET;
+	tx1.modes	|= ADJ_FREQUENCY;
+	tx1.modes	|= ADJ_STATUS;
+
+	tx1.status	= STA_PLL;
+	tx1.offset	= 0;
+	tx1.freq	= 0;
+	tx1.tick	= tickval;
+
+	adjtimex(&tx1);
+
+	sleep(1);
+
+	ppm = ((long long)tickval * MILLION)/systick - MILLION;
+	printf("Estimating tick (act: %ld usec, %lld ppm): ", tickval, ppm);
+
+	eppm = get_ppm_drift();
+	printf("%lld usec, %lld ppm", systick + (systick * eppm / MILLION), eppm);
+	fflush(stdout);
+
+	tx1.modes = 0;
+	adjtimex(&tx1);
+
+	if (tx1.offset || tx1.freq || tx1.tick != tickval) {
+		printf("	[ERROR]\n");
+		printf("\tUnexpected adjtimex return values, make sure ntpd is not running.\n");
+		return -1;
+	}
+
+	/*
+	 * Here we use 100ppm difference as an error bound.
+	 * We likely should see better, but some coarse clocksources
+	 * cannot match the HZ tick size accurately, so we have a
+	 * internal correction factor that doesn't scale exactly
+	 * with the adjustment, resulting in > 10ppm error during
+	 * a 10% adjustment. 100ppm also gives us more breathing
+	 * room for interruptions during the measurement.
+	 */
+	if (llabs(eppm - ppm) > 100) {
+		printf("	[FAILED]\n");
+		return -1;
+	}
+	printf("	[OK]\n");
+
+	return  0;
+}
+
+int main(int argv, char **argc)
+{
+	struct timespec raw;
+	long tick, max, interval, err;
+	struct timex tx1;
+
+	err = 0;
+	setbuf(stdout, NULL);
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+		printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+		return -1;
+	}
+
+	printf("Each iteration takes about 15 seconds\n");
+
+	systick = sysconf(_SC_CLK_TCK);
+	systick = USEC_PER_SEC/sysconf(_SC_CLK_TCK);
+	max = systick/10; /* +/- 10% */
+	interval = max/4; /* in 4 steps each side */
+
+	for (tick = (systick - max); tick < (systick + max); tick += interval) {
+		if (check_tick_adj(tick)) {
+			err = 1;
+			break;
+		}
+	}
+
+	/* Reset things to zero */
+	tx1.modes	 = ADJ_TICK;
+	tx1.modes	|= ADJ_OFFSET;
+	tx1.modes	|= ADJ_FREQUENCY;
+
+	tx1.offset	 = 0;
+	tx1.freq	 = 0;
+	tx1.tick	 = systick;
+
+	adjtimex(&tx1);
+
+	if (err)
+		return ksft_exit_fail();
+
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
new file mode 100644
index 000000000..4da09dbf8
--- /dev/null
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -0,0 +1,178 @@
+/* alarmtimer suspend test
+ *		John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *   This test makes sure the alarmtimer & RTC wakeup code is
+ *   functioning.
+ *
+ *  To build:
+ *	$ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include "../kselftest.h"
+
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_HWSPECIFIC		10
+#define CLOCK_TAI			11
+#define NR_CLOCKIDS			12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */
+
+#define SUSPEND_SECS 15
+int alarmcount;
+int alarm_clock_id;
+struct timespec start_time;
+
+
+char *clockstring(int clockid)
+{
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		return "CLOCK_REALTIME";
+	case CLOCK_MONOTONIC:
+		return "CLOCK_MONOTONIC";
+	case CLOCK_PROCESS_CPUTIME_ID:
+		return "CLOCK_PROCESS_CPUTIME_ID";
+	case CLOCK_THREAD_CPUTIME_ID:
+		return "CLOCK_THREAD_CPUTIME_ID";
+	case CLOCK_MONOTONIC_RAW:
+		return "CLOCK_MONOTONIC_RAW";
+	case CLOCK_REALTIME_COARSE:
+		return "CLOCK_REALTIME_COARSE";
+	case CLOCK_MONOTONIC_COARSE:
+		return "CLOCK_MONOTONIC_COARSE";
+	case CLOCK_BOOTTIME:
+		return "CLOCK_BOOTTIME";
+	case CLOCK_REALTIME_ALARM:
+		return "CLOCK_REALTIME_ALARM";
+	case CLOCK_BOOTTIME_ALARM:
+		return "CLOCK_BOOTTIME_ALARM";
+	case CLOCK_TAI:
+		return "CLOCK_TAI";
+	};
+	return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+	long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+	ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+	return ret;
+}
+
+int final_ret = 0;
+
+void sigalarm(int signo)
+{
+	long long delta_ns;
+	struct timespec ts;
+
+	clock_gettime(alarm_clock_id, &ts);
+	alarmcount++;
+
+	delta_ns = timespec_sub(start_time, ts);
+	delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount;
+
+	printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec,
+							ts.tv_nsec, delta_ns);
+
+	if (delta_ns > UNREASONABLE_LAT) {
+		printf("[FAIL]\n");
+		final_ret = -1;
+	} else
+		printf("[OK]\n");
+
+}
+
+int main(void)
+{
+	timer_t tm1;
+	struct itimerspec its1, its2;
+	struct sigevent se;
+	struct sigaction act;
+	int signum = SIGRTMAX;
+
+	/* Set up signal handler: */
+	sigfillset(&act.sa_mask);
+	act.sa_flags = 0;
+	act.sa_handler = sigalarm;
+	sigaction(signum, &act, NULL);
+
+	/* Set up timer: */
+	memset(&se, 0, sizeof(se));
+	se.sigev_notify = SIGEV_SIGNAL;
+	se.sigev_signo = signum;
+	se.sigev_value.sival_int = 0;
+
+	for (alarm_clock_id = CLOCK_REALTIME_ALARM;
+			alarm_clock_id <= CLOCK_BOOTTIME_ALARM;
+			alarm_clock_id++) {
+
+		alarmcount = 0;
+		if (timer_create(alarm_clock_id, &se, &tm1) == -1) {
+			printf("timer_create failed, %s unsupported?\n",
+					clockstring(alarm_clock_id));
+			break;
+		}
+
+		clock_gettime(alarm_clock_id, &start_time);
+		printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id),
+				start_time.tv_sec, start_time.tv_nsec);
+		printf("Setting alarm for every %i seconds\n", SUSPEND_SECS);
+		its1.it_value = start_time;
+		its1.it_value.tv_sec += SUSPEND_SECS;
+		its1.it_interval.tv_sec = SUSPEND_SECS;
+		its1.it_interval.tv_nsec = 0;
+
+		timer_settime(tm1, TIMER_ABSTIME, &its1, &its2);
+
+		while (alarmcount < 5)
+			sleep(1); /* First 5 alarms, do nothing */
+
+		printf("Starting suspend loops\n");
+		while (alarmcount < 10) {
+			int ret;
+
+			sleep(3);
+			ret = system("echo mem > /sys/power/state");
+			if (ret)
+				break;
+		}
+		timer_delete(tm1);
+	}
+	if (final_ret)
+		return ksft_exit_fail();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
new file mode 100644
index 000000000..c4eab7124
--- /dev/null
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -0,0 +1,96 @@
+/* ADJ_FREQ Skew change test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2012
+ *		Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which cranks the ADJ_FREQ knob and
+ *  then uses other tests to detect problems. Thus this test requires
+ *  that the raw_skew, inconsistency-check and nanosleep tests be
+ *  present in the same directory it is run from.
+ *
+ *  To build:
+ *	$ gcc change_skew.c -o change_skew -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+
+
+int change_skew_test(int ppm)
+{
+	struct timex tx;
+	int ret;
+
+	tx.modes = ADJ_FREQUENCY;
+	tx.freq = ppm << 16;
+
+	ret = adjtimex(&tx);
+	if (ret < 0) {
+		printf("Error adjusting freq\n");
+		return ret;
+	}
+
+	ret = system("./raw_skew");
+	ret |= system("./inconsistency-check");
+	ret |= system("./nanosleep");
+
+	return ret;
+}
+
+
+int main(int argv, char **argc)
+{
+	struct timex tx;
+	int i, ret;
+
+	int ppm[5] = {0, 250, 500, -250, -500};
+
+	/* Kill ntpd */
+	ret = system("killall -9 ntpd");
+
+	/* Make sure there's no offset adjustment going on */
+	tx.modes = ADJ_OFFSET;
+	tx.offset = 0;
+	ret = adjtimex(&tx);
+
+	if (ret < 0) {
+		printf("Maybe you're not running as root?\n");
+		return -1;
+	}
+
+	for (i = 0; i < 5; i++) {
+		printf("Using %i ppm adjustment\n", ppm[i]);
+		ret = change_skew_test(ppm[i]);
+		if (ret)
+			break;
+	}
+
+	/* Set things back */
+	tx.modes = ADJ_FREQUENCY;
+	tx.offset = 0;
+	adjtimex(&tx);
+
+	if (ret) {
+		printf("[FAIL]");
+		return ksft_exit_fail();
+	}
+	printf("[OK]");
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
new file mode 100644
index 000000000..bfc974b45
--- /dev/null
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -0,0 +1,168 @@
+/* Clocksource change test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2012
+ *		Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which quickly changes the clocksourc and
+ *  then uses other tests to detect problems. Thus this test requires
+ *  that the inconsistency-check and nanosleep tests be present in the
+ *  same directory it is run from.
+ *
+ *  To build:
+ *	$ gcc clocksource-switch.c -o clocksource-switch -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/wait.h>
+#include "../kselftest.h"
+
+
+int get_clocksources(char list[][30])
+{
+	int fd, i;
+	size_t size;
+	char buf[512];
+	char *head, *tmp;
+
+	fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY);
+
+	size = read(fd, buf, 512);
+
+	close(fd);
+
+	for (i = 0; i < 10; i++)
+		list[i][0] = '\0';
+
+	head = buf;
+	i = 0;
+	while (head - buf < size) {
+		/* Find the next space */
+		for (tmp = head; *tmp != ' '; tmp++) {
+			if (*tmp == '\n')
+				break;
+			if (*tmp == '\0')
+				break;
+		}
+		*tmp = '\0';
+		strcpy(list[i], head);
+		head = tmp + 1;
+		i++;
+	}
+
+	return i-1;
+}
+
+int get_cur_clocksource(char *buf, size_t size)
+{
+	int fd;
+
+	fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY);
+
+	size = read(fd, buf, size);
+
+	return 0;
+}
+
+int change_clocksource(char *clocksource)
+{
+	int fd;
+	ssize_t size;
+
+	fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY);
+
+	if (fd < 0)
+		return -1;
+
+	size = write(fd, clocksource, strlen(clocksource));
+
+	if (size < 0)
+		return -1;
+
+	close(fd);
+	return 0;
+}
+
+
+int run_tests(int secs)
+{
+	int ret;
+	char buf[255];
+
+	sprintf(buf, "./inconsistency-check -t %i", secs);
+	ret = system(buf);
+	if (ret)
+		return ret;
+	ret = system("./nanosleep");
+	return ret;
+}
+
+
+char clocksource_list[10][30];
+
+int main(int argv, char **argc)
+{
+	char orig_clk[512];
+	int count, i, status;
+	pid_t pid;
+
+	get_cur_clocksource(orig_clk, 512);
+
+	count = get_clocksources(clocksource_list);
+
+	if (change_clocksource(clocksource_list[0])) {
+		printf("Error: You probably need to run this as root\n");
+		return -1;
+	}
+
+	/* Check everything is sane before we start switching asyncrhonously */
+	for (i = 0; i < count; i++) {
+		printf("Validating clocksource %s\n", clocksource_list[i]);
+		if (change_clocksource(clocksource_list[i])) {
+			status = -1;
+			goto out;
+		}
+		if (run_tests(5)) {
+			status = -1;
+			goto out;
+		}
+	}
+
+
+	printf("Running Asynchronous Switching Tests...\n");
+	pid = fork();
+	if (!pid)
+		return run_tests(60);
+
+	while (pid != waitpid(pid, &status, WNOHANG))
+		for (i = 0; i < count; i++)
+			if (change_clocksource(clocksource_list[i])) {
+				status = -1;
+				goto out;
+			}
+out:
+	change_clocksource(orig_clk);
+
+	if (status)
+		return ksft_exit_fail();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c
new file mode 100644
index 000000000..14a2b77fd
--- /dev/null
+++ b/tools/testing/selftests/timers/freq-step.c
@@ -0,0 +1,271 @@
+/*
+ * This test checks the response of the system clock to frequency
+ * steps made with adjtimex(). The frequency error and stability of
+ * the CLOCK_MONOTONIC clock relative to the CLOCK_MONOTONIC_RAW clock
+ * is measured in two intervals following the step. The test fails if
+ * values from the second interval exceed specified limits.
+ *
+ * Copyright (C) Miroslav Lichvar <mlichvar@redhat.com>  2017
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define SAMPLES 100
+#define SAMPLE_READINGS 10
+#define MEAN_SAMPLE_INTERVAL 0.1
+#define STEP_INTERVAL 1.0
+#define MAX_PRECISION 100e-9
+#define MAX_FREQ_ERROR 10e-6
+#define MAX_STDDEV 1000e-9
+
+#ifndef ADJ_SETOFFSET
+  #define ADJ_SETOFFSET 0x0100
+#endif
+
+struct sample {
+	double offset;
+	double time;
+};
+
+static time_t mono_raw_base;
+static time_t mono_base;
+static long user_hz;
+static double precision;
+static double mono_freq_offset;
+
+static double diff_timespec(struct timespec *ts1, struct timespec *ts2)
+{
+	return ts1->tv_sec - ts2->tv_sec + (ts1->tv_nsec - ts2->tv_nsec) / 1e9;
+}
+
+static double get_sample(struct sample *sample)
+{
+	double delay, mindelay = 0.0;
+	struct timespec ts1, ts2, ts3;
+	int i;
+
+	for (i = 0; i < SAMPLE_READINGS; i++) {
+		clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
+		clock_gettime(CLOCK_MONOTONIC, &ts2);
+		clock_gettime(CLOCK_MONOTONIC_RAW, &ts3);
+
+		ts1.tv_sec -= mono_raw_base;
+		ts2.tv_sec -= mono_base;
+		ts3.tv_sec -= mono_raw_base;
+
+		delay = diff_timespec(&ts3, &ts1);
+		if (delay <= 1e-9) {
+			i--;
+			continue;
+		}
+
+		if (!i || delay < mindelay) {
+			sample->offset = diff_timespec(&ts2, &ts1);
+			sample->offset -= delay / 2.0;
+			sample->time = ts1.tv_sec + ts1.tv_nsec / 1e9;
+			mindelay = delay;
+		}
+	}
+
+	return mindelay;
+}
+
+static void reset_ntp_error(void)
+{
+	struct timex txc;
+
+	txc.modes = ADJ_SETOFFSET;
+	txc.time.tv_sec = 0;
+	txc.time.tv_usec = 0;
+
+	if (adjtimex(&txc) < 0) {
+		perror("[FAIL] adjtimex");
+		ksft_exit_fail();
+	}
+}
+
+static void set_frequency(double freq)
+{
+	struct timex txc;
+	int tick_offset;
+
+	tick_offset = 1e6 * freq / user_hz;
+
+	txc.modes = ADJ_TICK | ADJ_FREQUENCY;
+	txc.tick = 1000000 / user_hz + tick_offset;
+	txc.freq = (1e6 * freq - user_hz * tick_offset) * (1 << 16);
+
+	if (adjtimex(&txc) < 0) {
+		perror("[FAIL] adjtimex");
+		ksft_exit_fail();
+	}
+}
+
+static void regress(struct sample *samples, int n, double *intercept,
+		    double *slope, double *r_stddev, double *r_max)
+{
+	double x, y, r, x_sum, y_sum, xy_sum, x2_sum, r2_sum;
+	int i;
+
+	x_sum = 0.0, y_sum = 0.0, xy_sum = 0.0, x2_sum = 0.0;
+
+	for (i = 0; i < n; i++) {
+		x = samples[i].time;
+		y = samples[i].offset;
+
+		x_sum += x;
+		y_sum += y;
+		xy_sum += x * y;
+		x2_sum += x * x;
+	}
+
+	*slope = (xy_sum - x_sum * y_sum / n) / (x2_sum - x_sum * x_sum / n);
+	*intercept = (y_sum - *slope * x_sum) / n;
+
+	*r_max = 0.0, r2_sum = 0.0;
+
+	for (i = 0; i < n; i++) {
+		x = samples[i].time;
+		y = samples[i].offset;
+		r = fabs(x * *slope + *intercept - y);
+		if (*r_max < r)
+			*r_max = r;
+		r2_sum += r * r;
+	}
+
+	*r_stddev = sqrt(r2_sum / n);
+}
+
+static int run_test(int calibration, double freq_base, double freq_step)
+{
+	struct sample samples[SAMPLES];
+	double intercept, slope, stddev1, max1, stddev2, max2;
+	double freq_error1, freq_error2;
+	int i;
+
+	set_frequency(freq_base);
+
+	for (i = 0; i < 10; i++)
+		usleep(1e6 * MEAN_SAMPLE_INTERVAL / 10);
+
+	reset_ntp_error();
+
+	set_frequency(freq_base + freq_step);
+
+	for (i = 0; i < 10; i++)
+		usleep(rand() % 2000000 * STEP_INTERVAL / 10);
+
+	set_frequency(freq_base);
+
+	for (i = 0; i < SAMPLES; i++) {
+		usleep(rand() % 2000000 * MEAN_SAMPLE_INTERVAL);
+		get_sample(&samples[i]);
+	}
+
+	if (calibration) {
+		regress(samples, SAMPLES, &intercept, &slope, &stddev1, &max1);
+		mono_freq_offset = slope;
+		printf("CLOCK_MONOTONIC_RAW frequency offset: %11.3f ppm\n",
+		       1e6 * mono_freq_offset);
+		return 0;
+	}
+
+	regress(samples, SAMPLES / 2, &intercept, &slope, &stddev1, &max1);
+	freq_error1 = slope * (1.0 - mono_freq_offset) - mono_freq_offset -
+			freq_base;
+
+	regress(samples + SAMPLES / 2, SAMPLES / 2, &intercept, &slope,
+		&stddev2, &max2);
+	freq_error2 = slope * (1.0 - mono_freq_offset) - mono_freq_offset -
+			freq_base;
+
+	printf("%6.0f %+10.3f %6.0f %7.0f %+10.3f %6.0f %7.0f\t",
+	       1e6 * freq_step,
+	       1e6 * freq_error1, 1e9 * stddev1, 1e9 * max1,
+	       1e6 * freq_error2, 1e9 * stddev2, 1e9 * max2);
+
+	if (fabs(freq_error2) > MAX_FREQ_ERROR || stddev2 > MAX_STDDEV) {
+		printf("[FAIL]\n");
+		return 1;
+	}
+
+	printf("[OK]\n");
+	return 0;
+}
+
+static void init_test(void)
+{
+	struct timespec ts;
+	struct sample sample;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts)) {
+		perror("[FAIL] clock_gettime(CLOCK_MONOTONIC_RAW)");
+		ksft_exit_fail();
+	}
+
+	mono_raw_base = ts.tv_sec;
+
+	if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
+		perror("[FAIL] clock_gettime(CLOCK_MONOTONIC)");
+		ksft_exit_fail();
+	}
+
+	mono_base = ts.tv_sec;
+
+	user_hz = sysconf(_SC_CLK_TCK);
+
+	precision = get_sample(&sample) / 2.0;
+	printf("CLOCK_MONOTONIC_RAW+CLOCK_MONOTONIC precision: %.0f ns\t\t",
+	       1e9 * precision);
+
+	if (precision > MAX_PRECISION)
+		ksft_exit_skip("precision: %.0f ns > MAX_PRECISION: %.0f ns\n",
+				1e9 * precision, 1e9 * MAX_PRECISION);
+
+	printf("[OK]\n");
+	srand(ts.tv_sec ^ ts.tv_nsec);
+
+	run_test(1, 0.0, 0.0);
+}
+
+int main(int argc, char **argv)
+{
+	double freq_base, freq_step;
+	int i, j, fails = 0;
+
+	init_test();
+
+	printf("Checking response to frequency step:\n");
+	printf("  Step           1st interval              2nd interval\n");
+	printf("             Freq    Dev     Max       Freq    Dev     Max\n");
+
+	for (i = 2; i >= 0; i--) {
+		for (j = 0; j < 5; j++) {
+			freq_base = (rand() % (1 << 24) - (1 << 23)) / 65536e6;
+			freq_step = 10e-6 * (1 << (6 * i));
+			fails += run_test(0, freq_base, freq_step);
+		}
+	}
+
+	set_frequency(0.0);
+
+	if (fails)
+		return ksft_exit_fail();
+
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
new file mode 100644
index 000000000..022d3ffe3
--- /dev/null
+++ b/tools/testing/selftests/timers/inconsistency-check.c
@@ -0,0 +1,193 @@
+/* Time inconsistency check test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2003, 2004, 2005, 2012
+ *		(C) Copyright Linaro Limited 2015
+ *		Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc inconsistency-check.c -o inconsistency-check -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+#define CALLS_PER_LOOP 64
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_HWSPECIFIC		10
+#define CLOCK_TAI			11
+#define NR_CLOCKIDS			12
+
+char *clockstring(int clockid)
+{
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		return "CLOCK_REALTIME";
+	case CLOCK_MONOTONIC:
+		return "CLOCK_MONOTONIC";
+	case CLOCK_PROCESS_CPUTIME_ID:
+		return "CLOCK_PROCESS_CPUTIME_ID";
+	case CLOCK_THREAD_CPUTIME_ID:
+		return "CLOCK_THREAD_CPUTIME_ID";
+	case CLOCK_MONOTONIC_RAW:
+		return "CLOCK_MONOTONIC_RAW";
+	case CLOCK_REALTIME_COARSE:
+		return "CLOCK_REALTIME_COARSE";
+	case CLOCK_MONOTONIC_COARSE:
+		return "CLOCK_MONOTONIC_COARSE";
+	case CLOCK_BOOTTIME:
+		return "CLOCK_BOOTTIME";
+	case CLOCK_REALTIME_ALARM:
+		return "CLOCK_REALTIME_ALARM";
+	case CLOCK_BOOTTIME_ALARM:
+		return "CLOCK_BOOTTIME_ALARM";
+	case CLOCK_TAI:
+		return "CLOCK_TAI";
+	};
+	return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+	/* use unsigned to avoid false positives on 2038 rollover */
+	if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec)
+		return 1;
+	if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec)
+		return 0;
+	if (a.tv_nsec > b.tv_nsec)
+		return 0;
+	return 1;
+}
+
+
+
+int consistency_test(int clock_type, unsigned long seconds)
+{
+	struct timespec list[CALLS_PER_LOOP];
+	int i, inconsistent;
+	long now, then;
+	time_t t;
+	char *start_str;
+
+	clock_gettime(clock_type, &list[0]);
+	now = then = list[0].tv_sec;
+
+	/* timestamp start of test */
+	t = time(0);
+	start_str = ctime(&t);
+
+	while (seconds == -1 || now - then < seconds) {
+		inconsistent = -1;
+
+		/* Fill list */
+		for (i = 0; i < CALLS_PER_LOOP; i++)
+			clock_gettime(clock_type, &list[i]);
+
+		/* Check for inconsistencies */
+		for (i = 0; i < CALLS_PER_LOOP - 1; i++)
+			if (!in_order(list[i], list[i+1]))
+				inconsistent = i;
+
+		/* display inconsistency */
+		if (inconsistent >= 0) {
+			unsigned long long delta;
+
+			printf("\%s\n", start_str);
+			for (i = 0; i < CALLS_PER_LOOP; i++) {
+				if (i == inconsistent)
+					printf("--------------------\n");
+				printf("%lu:%lu\n", list[i].tv_sec,
+							list[i].tv_nsec);
+				if (i == inconsistent + 1)
+					printf("--------------------\n");
+			}
+			delta = list[inconsistent].tv_sec * NSEC_PER_SEC;
+			delta += list[inconsistent].tv_nsec;
+			delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC;
+			delta -= list[inconsistent+1].tv_nsec;
+			printf("Delta: %llu ns\n", delta);
+			fflush(0);
+			/* timestamp inconsistency*/
+			t = time(0);
+			printf("%s\n", ctime(&t));
+			printf("[FAILED]\n");
+			return -1;
+		}
+		now = list[0].tv_sec;
+	}
+	printf("[OK]\n");
+	return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+	int clockid, opt;
+	int userclock = CLOCK_REALTIME;
+	int maxclocks = NR_CLOCKIDS;
+	int runtime = 10;
+	struct timespec ts;
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "t:c:")) != -1) {
+		switch (opt) {
+		case 't':
+			runtime = atoi(optarg);
+			break;
+		case 'c':
+			userclock = atoi(optarg);
+			maxclocks = userclock + 1;
+			break;
+		default:
+			printf("Usage: %s [-t <secs>] [-c <clockid>]\n", argv[0]);
+			printf("	-t: Number of seconds to run\n");
+			printf("	-c: clockid to use (default, all clockids)\n");
+			exit(-1);
+		}
+	}
+
+	setbuf(stdout, NULL);
+
+	for (clockid = userclock; clockid < maxclocks; clockid++) {
+
+		if (clockid == CLOCK_HWSPECIFIC)
+			continue;
+
+		if (!clock_gettime(clockid, &ts)) {
+			printf("Consistent %-30s ", clockstring(clockid));
+			if (consistency_test(clockid, runtime))
+				return ksft_exit_fail();
+		}
+	}
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
new file mode 100644
index 000000000..19e46ed5d
--- /dev/null
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -0,0 +1,378 @@
+/* Leap second stress test
+ *              by: John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright 2013, 2015 Linaro Limited
+ *              Licensed under the GPLv2
+ *
+ *  This test signals the kernel to insert a leap second
+ *  every day at midnight GMT. This allows for stessing the
+ *  kernel's leap-second behavior, as well as how well applications
+ *  handle the leap-second discontinuity.
+ *
+ *  Usage: leap-a-day [-s] [-i <num>]
+ *
+ *  Options:
+ *	-s:	Each iteration, set the date to 10 seconds before midnight GMT.
+ *		This speeds up the number of leapsecond transitions tested,
+ *		but because it calls settimeofday frequently, advancing the
+ *		time by 24 hours every ~16 seconds, it may cause application
+ *		disruption.
+ *
+ *	-i:	Number of iterations to run (default: infinite)
+ *
+ *  Other notes: Disabling NTP prior to running this is advised, as the two
+ *		 may conflict in their commands to the kernel.
+ *
+ *  To build:
+ *	$ gcc leap-a-day.c -o leap-a-day -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/errno.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+#define CLOCK_TAI 11
+
+time_t next_leap;
+int error_found;
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+	if (a.tv_sec < b.tv_sec)
+		return 1;
+	if (a.tv_sec > b.tv_sec)
+		return 0;
+	if (a.tv_nsec > b.tv_nsec)
+		return 0;
+	return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+	ts.tv_nsec += ns;
+	while (ts.tv_nsec >= NSEC_PER_SEC) {
+		ts.tv_nsec -= NSEC_PER_SEC;
+		ts.tv_sec++;
+	}
+	return ts;
+}
+
+char *time_state_str(int state)
+{
+	switch (state) {
+	case TIME_OK:	return "TIME_OK";
+	case TIME_INS:	return "TIME_INS";
+	case TIME_DEL:	return "TIME_DEL";
+	case TIME_OOP:	return "TIME_OOP";
+	case TIME_WAIT:	return "TIME_WAIT";
+	case TIME_BAD:	return "TIME_BAD";
+	}
+	return "ERROR";
+}
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+	struct timex tx;
+	int ret;
+
+	/*
+	 * We have to call adjtime twice here, as kernels
+	 * prior to 6b1859dba01c7 (included in 3.5 and
+	 * -stable), had an issue with the state machine
+	 * and wouldn't clear the STA_INS/DEL flag directly.
+	 */
+	tx.modes = ADJ_STATUS;
+	tx.status = STA_PLL;
+	ret = adjtimex(&tx);
+
+	/* Clear maxerror, as it can cause UNSYNC to be set */
+	tx.modes = ADJ_MAXERROR;
+	tx.maxerror = 0;
+	ret = adjtimex(&tx);
+
+	/* Clear the status */
+	tx.modes = ADJ_STATUS;
+	tx.status = 0;
+	ret = adjtimex(&tx);
+
+	return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+	clear_time_state();
+	exit(0);
+}
+
+void sigalarm(int signo)
+{
+	struct timex tx;
+	int ret;
+
+	tx.modes = 0;
+	ret = adjtimex(&tx);
+
+	if (tx.time.tv_sec < next_leap) {
+		printf("Error: Early timer expiration! (Should be %ld)\n", next_leap);
+		error_found = 1;
+		printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+					tx.time.tv_sec,
+					tx.time.tv_usec,
+					tx.tai,
+					time_state_str(ret));
+	}
+	if (ret != TIME_WAIT) {
+		printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n");
+		error_found = 1;
+		printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+					tx.time.tv_sec,
+					tx.time.tv_usec,
+					tx.tai,
+					time_state_str(ret));
+	}
+}
+
+
+/* Test for known hrtimer failure */
+void test_hrtimer_failure(void)
+{
+	struct timespec now, target;
+
+	clock_gettime(CLOCK_REALTIME, &now);
+	target = timespec_add(now, NSEC_PER_SEC/2);
+	clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL);
+	clock_gettime(CLOCK_REALTIME, &now);
+
+	if (!in_order(target, now)) {
+		printf("ERROR: hrtimer early expiration failure observed.\n");
+		error_found = 1;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	timer_t tm1;
+	struct itimerspec its1;
+	struct sigevent se;
+	struct sigaction act;
+	int signum = SIGRTMAX;
+	int settime = 1;
+	int tai_time = 0;
+	int insert = 1;
+	int iterations = 10;
+	int opt;
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "sti:")) != -1) {
+		switch (opt) {
+		case 'w':
+			printf("Only setting leap-flag, not changing time. It could take up to a day for leap to trigger.\n");
+			settime = 0;
+			break;
+		case 'i':
+			iterations = atoi(optarg);
+			break;
+		case 't':
+			tai_time = 1;
+			break;
+		default:
+			printf("Usage: %s [-w] [-i <iterations>]\n", argv[0]);
+			printf("	-w: Set flag and wait for leap second each iteration");
+			printf("	    (default sets time to right before leapsecond)\n");
+			printf("	-i: Number of iterations (-1 = infinite, default is 10)\n");
+			printf("	-t: Print TAI time\n");
+			exit(-1);
+		}
+	}
+
+	/* Make sure TAI support is present if -t was used */
+	if (tai_time) {
+		struct timespec ts;
+
+		if (clock_gettime(CLOCK_TAI, &ts)) {
+			printf("System doesn't support CLOCK_TAI\n");
+			ksft_exit_fail();
+		}
+	}
+
+	signal(SIGINT, handler);
+	signal(SIGKILL, handler);
+
+	/* Set up timer signal handler: */
+	sigfillset(&act.sa_mask);
+	act.sa_flags = 0;
+	act.sa_handler = sigalarm;
+	sigaction(signum, &act, NULL);
+
+	if (iterations < 0)
+		printf("This runs continuously. Press ctrl-c to stop\n");
+	else
+		printf("Running for %i iterations. Press ctrl-c to stop\n", iterations);
+
+	printf("\n");
+	while (1) {
+		int ret;
+		struct timespec ts;
+		struct timex tx;
+		time_t now;
+
+		/* Get the current time */
+		clock_gettime(CLOCK_REALTIME, &ts);
+
+		/* Calculate the next possible leap second 23:59:60 GMT */
+		next_leap = ts.tv_sec;
+		next_leap += 86400 - (next_leap % 86400);
+
+		if (settime) {
+			struct timeval tv;
+
+			tv.tv_sec = next_leap - 10;
+			tv.tv_usec = 0;
+			settimeofday(&tv, NULL);
+			printf("Setting time to %s", ctime(&tv.tv_sec));
+		}
+
+		/* Reset NTP time state */
+		clear_time_state();
+
+		/* Set the leap second insert flag */
+		tx.modes = ADJ_STATUS;
+		if (insert)
+			tx.status = STA_INS;
+		else
+			tx.status = STA_DEL;
+		ret = adjtimex(&tx);
+		if (ret < 0) {
+			printf("Error: Problem setting STA_INS/STA_DEL!: %s\n",
+							time_state_str(ret));
+			return ksft_exit_fail();
+		}
+
+		/* Validate STA_INS was set */
+		tx.modes = 0;
+		ret = adjtimex(&tx);
+		if (tx.status != STA_INS && tx.status != STA_DEL) {
+			printf("Error: STA_INS/STA_DEL not set!: %s\n",
+							time_state_str(ret));
+			return ksft_exit_fail();
+		}
+
+		if (tai_time) {
+			printf("Using TAI time,"
+				" no inconsistencies should be seen!\n");
+		}
+
+		printf("Scheduling leap second for %s", ctime(&next_leap));
+
+		/* Set up timer */
+		printf("Setting timer for %ld -  %s", next_leap, ctime(&next_leap));
+		memset(&se, 0, sizeof(se));
+		se.sigev_notify = SIGEV_SIGNAL;
+		se.sigev_signo = signum;
+		se.sigev_value.sival_int = 0;
+		if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) {
+			printf("Error: timer_create failed\n");
+			return ksft_exit_fail();
+		}
+		its1.it_value.tv_sec = next_leap;
+		its1.it_value.tv_nsec = 0;
+		its1.it_interval.tv_sec = 0;
+		its1.it_interval.tv_nsec = 0;
+		timer_settime(tm1, TIMER_ABSTIME, &its1, NULL);
+
+		/* Wake up 3 seconds before leap */
+		ts.tv_sec = next_leap - 3;
+		ts.tv_nsec = 0;
+
+
+		while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL))
+			printf("Something woke us up, returning to sleep\n");
+
+		/* Validate STA_INS is still set */
+		tx.modes = 0;
+		ret = adjtimex(&tx);
+		if (tx.status != STA_INS && tx.status != STA_DEL) {
+			printf("Something cleared STA_INS/STA_DEL, setting it again.\n");
+			tx.modes = ADJ_STATUS;
+			if (insert)
+				tx.status = STA_INS;
+			else
+				tx.status = STA_DEL;
+			ret = adjtimex(&tx);
+		}
+
+		/* Check adjtimex output every half second */
+		now = tx.time.tv_sec;
+		while (now < next_leap + 2) {
+			char buf[26];
+			struct timespec tai;
+			int ret;
+
+			tx.modes = 0;
+			ret = adjtimex(&tx);
+
+			if (tai_time) {
+				clock_gettime(CLOCK_TAI, &tai);
+				printf("%ld sec, %9ld ns\t%s\n",
+						tai.tv_sec,
+						tai.tv_nsec,
+						time_state_str(ret));
+			} else {
+				ctime_r(&tx.time.tv_sec, buf);
+				buf[strlen(buf)-1] = 0; /*remove trailing\n */
+
+				printf("%s + %6ld us (%i)\t%s\n",
+						buf,
+						tx.time.tv_usec,
+						tx.tai,
+						time_state_str(ret));
+			}
+			now = tx.time.tv_sec;
+			/* Sleep for another half second */
+			ts.tv_sec = 0;
+			ts.tv_nsec = NSEC_PER_SEC / 2;
+			clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
+		}
+		/* Switch to using other mode */
+		insert = !insert;
+
+		/* Note if kernel has known hrtimer failure */
+		test_hrtimer_failure();
+
+		printf("Leap complete\n");
+		if (error_found) {
+			printf("Errors observed\n");
+			clear_time_state();
+			return ksft_exit_fail();
+		}
+		printf("\n");
+		if ((iterations != -1) && !(--iterations))
+			break;
+	}
+
+	clear_time_state();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
new file mode 100644
index 000000000..dc80728ed
--- /dev/null
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -0,0 +1,108 @@
+/* Demo leapsecond deadlock
+ *              by: John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright 2013, 2015 Linaro Limited
+ *              Licensed under the GPL
+ *
+ * This test demonstrates leapsecond deadlock that is possibe
+ * on kernels from 2.6.26 to 3.3.
+ *
+ * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA
+ * RUN AT YOUR OWN RISK!
+ *  To build:
+ *	$ gcc leapcrash.c -o leapcrash -lrt
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+	struct timex tx;
+	int ret;
+
+	/*
+	 * We have to call adjtime twice here, as kernels
+	 * prior to 6b1859dba01c7 (included in 3.5 and
+	 * -stable), had an issue with the state machine
+	 * and wouldn't clear the STA_INS/DEL flag directly.
+	 */
+	tx.modes = ADJ_STATUS;
+	tx.status = STA_PLL;
+	ret = adjtimex(&tx);
+
+	tx.modes = ADJ_STATUS;
+	tx.status = 0;
+	ret = adjtimex(&tx);
+
+	return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+	clear_time_state();
+	exit(0);
+}
+
+
+int main(void)
+{
+	struct timex tx;
+	struct timespec ts;
+	time_t next_leap;
+	int count = 0;
+
+	setbuf(stdout, NULL);
+
+	signal(SIGINT, handler);
+	signal(SIGKILL, handler);
+	printf("This runs for a few minutes. Press ctrl-c to stop\n");
+
+	clear_time_state();
+
+
+	/* Get the current time */
+	clock_gettime(CLOCK_REALTIME, &ts);
+
+	/* Calculate the next possible leap second 23:59:60 GMT */
+	next_leap = ts.tv_sec;
+	next_leap += 86400 - (next_leap % 86400);
+
+	for (count = 0; count < 20; count++) {
+		struct timeval tv;
+
+
+		/* set the time to 2 seconds before the leap */
+		tv.tv_sec = next_leap - 2;
+		tv.tv_usec = 0;
+		if (settimeofday(&tv, NULL)) {
+			printf("Error: You're likely not running with proper (ie: root) permissions\n");
+			return ksft_exit_fail();
+		}
+		tx.modes = 0;
+		adjtimex(&tx);
+
+		/* hammer on adjtime w/ STA_INS */
+		while (tx.time.tv_sec < next_leap + 1) {
+			/* Set the leap second insert flag */
+			tx.modes = ADJ_STATUS;
+			tx.status = STA_INS;
+			adjtimex(&tx);
+		}
+		clear_time_state();
+		printf(".");
+		fflush(stdout);
+	}
+	printf("[OK]\n");
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
new file mode 100644
index 000000000..7916cf5cc
--- /dev/null
+++ b/tools/testing/selftests/timers/mqueue-lat.c
@@ -0,0 +1,114 @@
+/* Measure mqueue timeout latency
+ *              by: john stultz (john.stultz@linaro.org)
+ *		(C) Copyright Linaro 2013
+ *
+ *		Inspired with permission from example test by:
+ *			Romain Francoise <romain@orebokech.com>
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc mqueue-lat.c -o mqueue-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <errno.h>
+#include <mqueue.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define TARGET_TIMEOUT		100000000	/* 100ms in nanoseconds */
+#define UNRESONABLE_LATENCY	40000000	/* 40ms in nanosecs */
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+	long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+	ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+	return ret;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+	ts.tv_nsec += ns;
+	while (ts.tv_nsec >= NSEC_PER_SEC) {
+		ts.tv_nsec -= NSEC_PER_SEC;
+		ts.tv_sec++;
+	}
+	return ts;
+}
+
+int mqueue_lat_test(void)
+{
+
+	mqd_t q;
+	struct mq_attr attr;
+	struct timespec start, end, now, target;
+	int i, count, ret;
+
+	q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL);
+	if (q < 0) {
+		perror("mq_open");
+		return -1;
+	}
+	mq_getattr(q, &attr);
+
+
+	count = 100;
+	clock_gettime(CLOCK_MONOTONIC, &start);
+
+	for (i = 0; i < count; i++) {
+		char buf[attr.mq_msgsize];
+
+		clock_gettime(CLOCK_REALTIME, &now);
+		target = now;
+		target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */
+
+		ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target);
+		if (ret < 0 && errno != ETIMEDOUT) {
+			perror("mq_timedreceive");
+			return -1;
+		}
+	}
+	clock_gettime(CLOCK_MONOTONIC, &end);
+
+	mq_close(q);
+
+	if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY)
+		return -1;
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	printf("Mqueue latency :                          ");
+	fflush(stdout);
+
+	ret = mqueue_lat_test();
+	if (ret < 0) {
+		printf("[FAILED]\n");
+		return ksft_exit_fail();
+	}
+	printf("[OK]\n");
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
new file mode 100644
index 000000000..71b5441c2
--- /dev/null
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -0,0 +1,165 @@
+/* Make sure timers don't return early
+ *              by: john stultz (johnstul@us.ibm.com)
+ *		    John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright IBM 2012
+ *              (C) Copyright Linaro 2013 2015
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc nanosleep.c -o nanosleep -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_HWSPECIFIC		10
+#define CLOCK_TAI			11
+#define NR_CLOCKIDS			12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		return "CLOCK_REALTIME";
+	case CLOCK_MONOTONIC:
+		return "CLOCK_MONOTONIC";
+	case CLOCK_PROCESS_CPUTIME_ID:
+		return "CLOCK_PROCESS_CPUTIME_ID";
+	case CLOCK_THREAD_CPUTIME_ID:
+		return "CLOCK_THREAD_CPUTIME_ID";
+	case CLOCK_MONOTONIC_RAW:
+		return "CLOCK_MONOTONIC_RAW";
+	case CLOCK_REALTIME_COARSE:
+		return "CLOCK_REALTIME_COARSE";
+	case CLOCK_MONOTONIC_COARSE:
+		return "CLOCK_MONOTONIC_COARSE";
+	case CLOCK_BOOTTIME:
+		return "CLOCK_BOOTTIME";
+	case CLOCK_REALTIME_ALARM:
+		return "CLOCK_REALTIME_ALARM";
+	case CLOCK_BOOTTIME_ALARM:
+		return "CLOCK_BOOTTIME_ALARM";
+	case CLOCK_TAI:
+		return "CLOCK_TAI";
+	};
+	return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+	if (a.tv_sec < b.tv_sec)
+		return 1;
+	if (a.tv_sec > b.tv_sec)
+		return 0;
+	if (a.tv_nsec > b.tv_nsec)
+		return 0;
+	return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+	ts.tv_nsec += ns;
+	while (ts.tv_nsec >= NSEC_PER_SEC) {
+		ts.tv_nsec -= NSEC_PER_SEC;
+		ts.tv_sec++;
+	}
+	return ts;
+}
+
+int nanosleep_test(int clockid, long long ns)
+{
+	struct timespec now, target, rel;
+
+	/* First check abs time */
+	if (clock_gettime(clockid, &now))
+		return UNSUPPORTED;
+	target = timespec_add(now, ns);
+
+	if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL))
+		return UNSUPPORTED;
+	clock_gettime(clockid, &now);
+
+	if (!in_order(target, now))
+		return -1;
+
+	/* Second check reltime */
+	clock_gettime(clockid, &now);
+	rel.tv_sec = 0;
+	rel.tv_nsec = 0;
+	rel = timespec_add(rel, ns);
+	target = timespec_add(now, ns);
+	clock_nanosleep(clockid, 0, &rel, NULL);
+	clock_gettime(clockid, &now);
+
+	if (!in_order(target, now))
+		return -1;
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	long long length;
+	int clockid, ret;
+
+	for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+		/* Skip cputime clockids since nanosleep won't increment cputime */
+		if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+				clockid == CLOCK_THREAD_CPUTIME_ID ||
+				clockid == CLOCK_HWSPECIFIC)
+			continue;
+
+		printf("Nanosleep %-31s ", clockstring(clockid));
+		fflush(stdout);
+
+		length = 10;
+		while (length <= (NSEC_PER_SEC * 10)) {
+			ret = nanosleep_test(clockid, length);
+			if (ret == UNSUPPORTED) {
+				printf("[UNSUPPORTED]\n");
+				goto next;
+			}
+			if (ret < 0) {
+				printf("[FAILED]\n");
+				return ksft_exit_fail();
+			}
+			length *= 100;
+		}
+		printf("[OK]\n");
+next:
+		ret = 0;
+	}
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
new file mode 100644
index 000000000..eb3e79ed7
--- /dev/null
+++ b/tools/testing/selftests/timers/nsleep-lat.c
@@ -0,0 +1,180 @@
+/* Measure nanosleep timer latency
+ *              by: john stultz (john.stultz@linaro.org)
+ *		(C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc nsleep-lat.c -o nsleep-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_HWSPECIFIC		10
+#define CLOCK_TAI			11
+#define NR_CLOCKIDS			12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		return "CLOCK_REALTIME";
+	case CLOCK_MONOTONIC:
+		return "CLOCK_MONOTONIC";
+	case CLOCK_PROCESS_CPUTIME_ID:
+		return "CLOCK_PROCESS_CPUTIME_ID";
+	case CLOCK_THREAD_CPUTIME_ID:
+		return "CLOCK_THREAD_CPUTIME_ID";
+	case CLOCK_MONOTONIC_RAW:
+		return "CLOCK_MONOTONIC_RAW";
+	case CLOCK_REALTIME_COARSE:
+		return "CLOCK_REALTIME_COARSE";
+	case CLOCK_MONOTONIC_COARSE:
+		return "CLOCK_MONOTONIC_COARSE";
+	case CLOCK_BOOTTIME:
+		return "CLOCK_BOOTTIME";
+	case CLOCK_REALTIME_ALARM:
+		return "CLOCK_REALTIME_ALARM";
+	case CLOCK_BOOTTIME_ALARM:
+		return "CLOCK_BOOTTIME_ALARM";
+	case CLOCK_TAI:
+		return "CLOCK_TAI";
+	};
+	return "UNKNOWN_CLOCKID";
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+	ts.tv_nsec += ns;
+	while (ts.tv_nsec >= NSEC_PER_SEC) {
+		ts.tv_nsec -= NSEC_PER_SEC;
+		ts.tv_sec++;
+	}
+	return ts;
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+	long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+	ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+	return ret;
+}
+
+int nanosleep_lat_test(int clockid, long long ns)
+{
+	struct timespec start, end, target;
+	long long latency = 0;
+	int i, count;
+
+	target.tv_sec = ns/NSEC_PER_SEC;
+	target.tv_nsec = ns%NSEC_PER_SEC;
+
+	if (clock_gettime(clockid, &start))
+		return UNSUPPORTED;
+	if (clock_nanosleep(clockid, 0, &target, NULL))
+		return UNSUPPORTED;
+
+	count = 10;
+
+	/* First check relative latency */
+	clock_gettime(clockid, &start);
+	for (i = 0; i < count; i++)
+		clock_nanosleep(clockid, 0, &target, NULL);
+	clock_gettime(clockid, &end);
+
+	if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) {
+		printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
+		return -1;
+	}
+
+	/* Next check absolute latency */
+	for (i = 0; i < count; i++) {
+		clock_gettime(clockid, &start);
+		target = timespec_add(start, ns);
+		clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL);
+		clock_gettime(clockid, &end);
+		latency += timespec_sub(target, end);
+	}
+
+	if (latency/count > UNRESONABLE_LATENCY) {
+		printf("Large abs latency: %lld ns :", latency/count);
+		return -1;
+	}
+
+	return 0;
+}
+
+
+
+int main(int argc, char **argv)
+{
+	long long length;
+	int clockid, ret;
+
+	for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+		/* Skip cputime clockids since nanosleep won't increment cputime */
+		if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+				clockid == CLOCK_THREAD_CPUTIME_ID ||
+				clockid == CLOCK_HWSPECIFIC)
+			continue;
+
+		printf("nsleep latency %-26s ", clockstring(clockid));
+		fflush(stdout);
+
+		length = 10;
+		while (length <= (NSEC_PER_SEC * 10)) {
+			ret = nanosleep_lat_test(clockid, length);
+			if (ret)
+				break;
+			length *= 100;
+
+		}
+
+		if (ret == UNSUPPORTED) {
+			printf("[UNSUPPORTED]\n");
+			continue;
+		}
+		if (ret < 0) {
+			printf("[FAILED]\n");
+			return ksft_exit_fail();
+		}
+		printf("[OK]\n");
+	}
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
new file mode 100644
index 000000000..15cf56d32
--- /dev/null
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2013 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for a few posix timers interface.
+ *
+ * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com>
+ */
+
+#include <sys/time.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+
+#define DELAY 2
+#define USECS_PER_SEC 1000000
+
+static volatile int done;
+
+/* Busy loop in userspace to elapse ITIMER_VIRTUAL */
+static void user_loop(void)
+{
+	while (!done);
+}
+
+/*
+ * Try to spend as much time as possible in kernelspace
+ * to elapse ITIMER_PROF.
+ */
+static void kernel_loop(void)
+{
+	void *addr = sbrk(0);
+	int err = 0;
+
+	while (!done && !err) {
+		err = brk(addr + 4096);
+		err |= brk(addr);
+	}
+}
+
+/*
+ * Sleep until ITIMER_REAL expiration.
+ */
+static void idle_loop(void)
+{
+	pause();
+}
+
+static void sig_handler(int nr)
+{
+	done = 1;
+}
+
+/*
+ * Check the expected timer expiration matches the GTOD elapsed delta since
+ * we armed the timer. Keep a 0.5 sec error margin due to various jitter.
+ */
+static int check_diff(struct timeval start, struct timeval end)
+{
+	long long diff;
+
+	diff = end.tv_usec - start.tv_usec;
+	diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
+
+	if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+		printf("Diff too high: %lld..", diff);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int check_itimer(int which)
+{
+	int err;
+	struct timeval start, end;
+	struct itimerval val = {
+		.it_value.tv_sec = DELAY,
+	};
+
+	printf("Check itimer ");
+
+	if (which == ITIMER_VIRTUAL)
+		printf("virtual... ");
+	else if (which == ITIMER_PROF)
+		printf("prof... ");
+	else if (which == ITIMER_REAL)
+		printf("real... ");
+
+	fflush(stdout);
+
+	done = 0;
+
+	if (which == ITIMER_VIRTUAL)
+		signal(SIGVTALRM, sig_handler);
+	else if (which == ITIMER_PROF)
+		signal(SIGPROF, sig_handler);
+	else if (which == ITIMER_REAL)
+		signal(SIGALRM, sig_handler);
+
+	err = gettimeofday(&start, NULL);
+	if (err < 0) {
+		perror("Can't call gettimeofday()\n");
+		return -1;
+	}
+
+	err = setitimer(which, &val, NULL);
+	if (err < 0) {
+		perror("Can't set timer\n");
+		return -1;
+	}
+
+	if (which == ITIMER_VIRTUAL)
+		user_loop();
+	else if (which == ITIMER_PROF)
+		kernel_loop();
+	else if (which == ITIMER_REAL)
+		idle_loop();
+
+	err = gettimeofday(&end, NULL);
+	if (err < 0) {
+		perror("Can't call gettimeofday()\n");
+		return -1;
+	}
+
+	if (!check_diff(start, end))
+		printf("[OK]\n");
+	else
+		printf("[FAIL]\n");
+
+	return 0;
+}
+
+static int check_timer_create(int which)
+{
+	int err;
+	timer_t id;
+	struct timeval start, end;
+	struct itimerspec val = {
+		.it_value.tv_sec = DELAY,
+	};
+
+	printf("Check timer_create() ");
+	if (which == CLOCK_THREAD_CPUTIME_ID) {
+		printf("per thread... ");
+	} else if (which == CLOCK_PROCESS_CPUTIME_ID) {
+		printf("per process... ");
+	}
+	fflush(stdout);
+
+	done = 0;
+	err = timer_create(which, NULL, &id);
+	if (err < 0) {
+		perror("Can't create timer\n");
+		return -1;
+	}
+	signal(SIGALRM, sig_handler);
+
+	err = gettimeofday(&start, NULL);
+	if (err < 0) {
+		perror("Can't call gettimeofday()\n");
+		return -1;
+	}
+
+	err = timer_settime(id, 0, &val, NULL);
+	if (err < 0) {
+		perror("Can't set timer\n");
+		return -1;
+	}
+
+	user_loop();
+
+	err = gettimeofday(&end, NULL);
+	if (err < 0) {
+		perror("Can't call gettimeofday()\n");
+		return -1;
+	}
+
+	if (!check_diff(start, end))
+		printf("[OK]\n");
+	else
+		printf("[FAIL]\n");
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	printf("Testing posix timers. False negative may happen on CPU execution \n");
+	printf("based timers if other threads run on the CPU...\n");
+
+	if (check_itimer(ITIMER_VIRTUAL) < 0)
+		return ksft_exit_fail();
+
+	if (check_itimer(ITIMER_PROF) < 0)
+		return ksft_exit_fail();
+
+	if (check_itimer(ITIMER_REAL) < 0)
+		return ksft_exit_fail();
+
+	if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0)
+		return ksft_exit_fail();
+
+	/*
+	 * It's unfortunately hard to reliably test a timer expiration
+	 * on parallel multithread cputime. We could arm it to expire
+	 * on DELAY * nr_threads, with nr_threads busy looping, then wait
+	 * the normal DELAY since the time is elapsing nr_threads faster.
+	 * But for that we need to ensure we have real physical free CPUs
+	 * to ensure true parallelism. So test only one thread until we
+	 * find a better solution.
+	 */
+	if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
+		return ksft_exit_fail();
+
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
new file mode 100644
index 000000000..b41d8dd0c
--- /dev/null
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -0,0 +1,148 @@
+/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		    John Stultz <john.stultz@linaro.org>
+ *		(C) Copyright IBM 2012
+ *		(C) Copyright Linaro Limited 2015
+ *		Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc raw_skew.c -o raw_skew -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include "../kselftest.h"
+
+#define CLOCK_MONOTONIC_RAW		4
+#define NSEC_PER_SEC 1000000000LL
+
+#define shift_right(x, s) ({		\
+	__typeof__(x) __x = (x);	\
+	__typeof__(s) __s = (s);	\
+	__x < 0 ? -(-__x >> __s) : __x >> __s; \
+})
+
+long long llabs(long long val)
+{
+	if (val < 0)
+		val = -val;
+	return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+	return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+	struct timespec ts;
+
+	ts.tv_sec = ns/NSEC_PER_SEC;
+	ts.tv_nsec = ns%NSEC_PER_SEC;
+	return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+	long long start_ns, end_ns;
+
+	start_ns = ts_to_nsec(start);
+	end_ns = ts_to_nsec(end);
+	return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+	struct timespec start, mid, end;
+	long long diff = 0, tmp;
+	int i;
+
+	for (i = 0; i < 3; i++) {
+		long long newdiff;
+
+		clock_gettime(CLOCK_MONOTONIC, &start);
+		clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+		clock_gettime(CLOCK_MONOTONIC, &end);
+
+		newdiff = diff_timespec(start, end);
+		if (diff == 0 || newdiff < diff) {
+			diff = newdiff;
+			*raw = mid;
+			tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+			*mon = nsec_to_ts(tmp);
+		}
+	}
+}
+
+int main(int argv, char **argc)
+{
+	struct timespec mon, raw, start, end;
+	long long delta1, delta2, interval, eppm, ppm;
+	struct timex tx1, tx2;
+
+	setbuf(stdout, NULL);
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+		printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+		return -1;
+	}
+
+	tx1.modes = 0;
+	adjtimex(&tx1);
+	get_monotonic_and_raw(&mon, &raw);
+	start = mon;
+	delta1 = diff_timespec(mon, raw);
+
+	if (tx1.offset)
+		printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n");
+
+	printf("Estimating clock drift: ");
+	fflush(stdout);
+	sleep(120);
+
+	get_monotonic_and_raw(&mon, &raw);
+	end = mon;
+	tx2.modes = 0;
+	adjtimex(&tx2);
+	delta2 = diff_timespec(mon, raw);
+
+	interval = diff_timespec(start, end);
+
+	/* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */
+	eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval;
+	eppm = -eppm;
+	printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
+
+	/* Avg the two actual freq samples adjtimex gave us */
+	ppm = (tx1.freq + tx2.freq) * 1000 / 2;
+	ppm = (long long)tx1.freq * 1000;
+	ppm = shift_right(ppm, 16);
+	printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
+
+	if (llabs(eppm - ppm) > 1000) {
+		if (tx1.offset || tx2.offset ||
+		    tx1.freq != tx2.freq || tx1.tick != tx2.tick) {
+			printf("	[SKIP]\n");
+			return ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n");
+		}
+		printf("	[FAILED]\n");
+		return ksft_exit_fail();
+	}
+	printf("	[OK]\n");
+	return  ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
new file mode 100644
index 000000000..4ef2184f1
--- /dev/null
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Periodic Interrupt test program
+ *
+ * Since commit 6610e0893b8bc ("RTC: Rework RTC code to use timerqueue for
+ * events"), PIE are completely handled using hrtimers, without actually using
+ * any underlying hardware RTC.
+ *
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+
+/*
+ * This expects the new RTC class driver framework, working with
+ * clocks that will often not be clones of what the PC-AT had.
+ * Use the command line to specify another RTC if you need one.
+ */
+static const char default_rtc[] = "/dev/rtc0";
+
+int main(int argc, char **argv)
+{
+	int i, fd, retval, irqcount = 0;
+	unsigned long tmp, data, old_pie_rate;
+	const char *rtc = default_rtc;
+	struct timeval start, end, diff;
+
+	switch (argc) {
+	case 2:
+		rtc = argv[1];
+		break;
+	case 1:
+		fd = open(default_rtc, O_RDONLY);
+		if (fd == -1) {
+			printf("Default RTC %s does not exist. Test Skipped!\n", default_rtc);
+			exit(KSFT_SKIP);
+		}
+		close(fd);
+		break;
+	default:
+		fprintf(stderr, "usage:  rtctest [rtcdev] [d]\n");
+		return 1;
+	}
+
+	fd = open(rtc, O_RDONLY);
+
+	if (fd ==  -1) {
+		perror(rtc);
+		exit(errno);
+	}
+
+	/* Read periodic IRQ rate */
+	retval = ioctl(fd, RTC_IRQP_READ, &old_pie_rate);
+	if (retval == -1) {
+		/* not all RTCs support periodic IRQs */
+		if (errno == EINVAL) {
+			fprintf(stderr, "\nNo periodic IRQ support\n");
+			goto done;
+		}
+		perror("RTC_IRQP_READ ioctl");
+		exit(errno);
+	}
+	fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", old_pie_rate);
+
+	fprintf(stderr, "Counting 20 interrupts at:");
+	fflush(stderr);
+
+	/* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
+	for (tmp=2; tmp<=64; tmp*=2) {
+
+		retval = ioctl(fd, RTC_IRQP_SET, tmp);
+		if (retval == -1) {
+			/* not all RTCs can change their periodic IRQ rate */
+			if (errno == EINVAL) {
+				fprintf(stderr,
+					"\n...Periodic IRQ rate is fixed\n");
+				goto done;
+			}
+			perror("RTC_IRQP_SET ioctl");
+			exit(errno);
+		}
+
+		fprintf(stderr, "\n%ldHz:\t", tmp);
+		fflush(stderr);
+
+		/* Enable periodic interrupts */
+		retval = ioctl(fd, RTC_PIE_ON, 0);
+		if (retval == -1) {
+			perror("RTC_PIE_ON ioctl");
+			exit(errno);
+		}
+
+		for (i=1; i<21; i++) {
+			gettimeofday(&start, NULL);
+			/* This blocks */
+			retval = read(fd, &data, sizeof(unsigned long));
+			if (retval == -1) {
+				perror("read");
+				exit(errno);
+			}
+			gettimeofday(&end, NULL);
+			timersub(&end, &start, &diff);
+			if (diff.tv_sec > 0 ||
+			    diff.tv_usec > ((1000000L / tmp) * 1.10)) {
+				fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
+				       diff.tv_sec, diff.tv_usec,
+				       (1000000L / tmp));
+				fflush(stdout);
+				exit(-1);
+			}
+
+			fprintf(stderr, " %d",i);
+			fflush(stderr);
+			irqcount++;
+		}
+
+		/* Disable periodic interrupts */
+		retval = ioctl(fd, RTC_PIE_OFF, 0);
+		if (retval == -1) {
+			perror("RTC_PIE_OFF ioctl");
+			exit(errno);
+		}
+	}
+
+done:
+	ioctl(fd, RTC_IRQP_SET, old_pie_rate);
+
+	fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
+
+	close(fd);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
new file mode 100644
index 000000000..688cfd81b
--- /dev/null
+++ b/tools/testing/selftests/timers/set-2038.c
@@ -0,0 +1,133 @@
+/* Time bounds setting test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2012
+ *		Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which sets the time to edge cases then
+ *  uses other tests to detect problems. Thus this test requires that
+ *  the inconsistency-check and nanosleep tests be present in the same
+ *  directory it is run from.
+ *
+ *  To build:
+ *	$ gcc set-2038.c -o set-2038 -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/time.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+
+#define KTIME_MAX	((long long)~((unsigned long long)1 << 63))
+#define KTIME_SEC_MAX	(KTIME_MAX / NSEC_PER_SEC)
+
+#define YEAR_1901 (-0x7fffffffL)
+#define YEAR_1970 1
+#define YEAR_2038 0x7fffffffL			/*overflows 32bit time_t */
+#define YEAR_2262 KTIME_SEC_MAX			/*overflows 64bit ktime_t */
+#define YEAR_MAX  ((long long)((1ULL<<63)-1))	/*overflows 64bit time_t */
+
+int is32bits(void)
+{
+	return (sizeof(long) == 4);
+}
+
+int settime(long long time)
+{
+	struct timeval now;
+	int ret;
+
+	now.tv_sec = (time_t)time;
+	now.tv_usec  = 0;
+
+	ret = settimeofday(&now, NULL);
+
+	printf("Setting time to 0x%lx: %d\n", (long)time, ret);
+	return ret;
+}
+
+int do_tests(void)
+{
+	int ret;
+
+	ret = system("date");
+	ret = system("./inconsistency-check -c 0 -t 20");
+	ret |= system("./nanosleep");
+	ret |= system("./nsleep-lat");
+	return ret;
+
+}
+
+int main(int argc, char *argv[])
+{
+	int ret = 0;
+	int opt, dangerous = 0;
+	time_t start;
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "d")) != -1) {
+		switch (opt) {
+		case 'd':
+			dangerous = 1;
+		}
+	}
+
+	start = time(0);
+
+	/* First test that crazy values don't work */
+	if (!settime(YEAR_1901)) {
+		ret = -1;
+		goto out;
+	}
+	if (!settime(YEAR_MAX)) {
+		ret = -1;
+		goto out;
+	}
+	if (!is32bits() && !settime(YEAR_2262)) {
+		ret = -1;
+		goto out;
+	}
+
+	/* Now test behavior near edges */
+	settime(YEAR_1970);
+	ret = do_tests();
+	if (ret)
+		goto out;
+
+	settime(YEAR_2038 - 600);
+	ret = do_tests();
+	if (ret)
+		goto out;
+
+	/* The rest of the tests can blowup on 32bit systems */
+	if (is32bits() && !dangerous)
+		goto out;
+	/* Test rollover behavior 32bit edge */
+	settime(YEAR_2038 - 10);
+	ret = do_tests();
+	if (ret)
+		goto out;
+
+	settime(YEAR_2262 - 600);
+	ret = do_tests();
+
+out:
+	/* restore clock */
+	settime(start);
+	if (ret)
+		return ksft_exit_fail();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
new file mode 100644
index 000000000..8c4179ee2
--- /dev/null
+++ b/tools/testing/selftests/timers/set-tai.c
@@ -0,0 +1,69 @@
+/* Set tai offset
+ *              by: John Stultz <john.stultz@linaro.org>
+ *              (C) Copyright Linaro 2013
+ *              Licensed under the GPLv2
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+int set_tai(int offset)
+{
+	struct timex tx;
+
+	memset(&tx, 0, sizeof(tx));
+
+	tx.modes = ADJ_TAI;
+	tx.constant = offset;
+
+	return adjtimex(&tx);
+}
+
+int get_tai(void)
+{
+	struct timex tx;
+
+	memset(&tx, 0, sizeof(tx));
+
+	adjtimex(&tx);
+	return tx.tai;
+}
+
+int main(int argc, char **argv)
+{
+	int i, ret;
+
+	ret = get_tai();
+	printf("tai offset started at %i\n", ret);
+
+	printf("Checking tai offsets can be properly set: ");
+	fflush(stdout);
+	for (i = 1; i <= 60; i++) {
+		ret = set_tai(i);
+		ret = get_tai();
+		if (ret != i) {
+			printf("[FAILED] expected: %i got %i\n", i, ret);
+			return ksft_exit_fail();
+		}
+	}
+	printf("[OK]\n");
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
new file mode 100644
index 000000000..50da45437
--- /dev/null
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -0,0 +1,283 @@
+/* set_timer latency test
+ *		John Stultz (john.stultz@linaro.org)
+ *              (C) Copyright Linaro 2014
+ *              Licensed under the GPLv2
+ *
+ *   This test makes sure the set_timer api is correct
+ *
+ *  To build:
+ *	$ gcc set-timer-lat.c -o set-timer-lat -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include "../kselftest.h"
+
+#define CLOCK_REALTIME			0
+#define CLOCK_MONOTONIC			1
+#define CLOCK_PROCESS_CPUTIME_ID	2
+#define CLOCK_THREAD_CPUTIME_ID		3
+#define CLOCK_MONOTONIC_RAW		4
+#define CLOCK_REALTIME_COARSE		5
+#define CLOCK_MONOTONIC_COARSE		6
+#define CLOCK_BOOTTIME			7
+#define CLOCK_REALTIME_ALARM		8
+#define CLOCK_BOOTTIME_ALARM		9
+#define CLOCK_HWSPECIFIC		10
+#define CLOCK_TAI			11
+#define NR_CLOCKIDS			12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+#define TIMER_SECS 1
+int alarmcount;
+int clock_id;
+struct timespec start_time;
+long long max_latency_ns;
+int timer_fired_early;
+
+char *clockstring(int clockid)
+{
+	switch (clockid) {
+	case CLOCK_REALTIME:
+		return "CLOCK_REALTIME";
+	case CLOCK_MONOTONIC:
+		return "CLOCK_MONOTONIC";
+	case CLOCK_PROCESS_CPUTIME_ID:
+		return "CLOCK_PROCESS_CPUTIME_ID";
+	case CLOCK_THREAD_CPUTIME_ID:
+		return "CLOCK_THREAD_CPUTIME_ID";
+	case CLOCK_MONOTONIC_RAW:
+		return "CLOCK_MONOTONIC_RAW";
+	case CLOCK_REALTIME_COARSE:
+		return "CLOCK_REALTIME_COARSE";
+	case CLOCK_MONOTONIC_COARSE:
+		return "CLOCK_MONOTONIC_COARSE";
+	case CLOCK_BOOTTIME:
+		return "CLOCK_BOOTTIME";
+	case CLOCK_REALTIME_ALARM:
+		return "CLOCK_REALTIME_ALARM";
+	case CLOCK_BOOTTIME_ALARM:
+		return "CLOCK_BOOTTIME_ALARM";
+	case CLOCK_TAI:
+		return "CLOCK_TAI";
+	};
+	return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+	long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+	ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+	return ret;
+}
+
+
+void sigalarm(int signo)
+{
+	long long delta_ns;
+	struct timespec ts;
+
+	clock_gettime(clock_id, &ts);
+	alarmcount++;
+
+	delta_ns = timespec_sub(start_time, ts);
+	delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount;
+
+	if (delta_ns < 0)
+		timer_fired_early = 1;
+
+	if (delta_ns > max_latency_ns)
+		max_latency_ns = delta_ns;
+}
+
+void describe_timer(int flags, int interval)
+{
+	printf("%-22s %s %s ",
+			clockstring(clock_id),
+			flags ? "ABSTIME":"RELTIME",
+			interval ? "PERIODIC":"ONE-SHOT");
+}
+
+int setup_timer(int clock_id, int flags, int interval, timer_t *tm1)
+{
+	struct sigevent se;
+	struct itimerspec its1, its2;
+	int err;
+
+	/* Set up timer: */
+	memset(&se, 0, sizeof(se));
+	se.sigev_notify = SIGEV_SIGNAL;
+	se.sigev_signo = SIGRTMAX;
+	se.sigev_value.sival_int = 0;
+
+	max_latency_ns = 0;
+	alarmcount = 0;
+	timer_fired_early = 0;
+
+	err = timer_create(clock_id, &se, tm1);
+	if (err) {
+		if ((clock_id == CLOCK_REALTIME_ALARM) ||
+		    (clock_id == CLOCK_BOOTTIME_ALARM)) {
+			printf("%-22s %s missing CAP_WAKE_ALARM?    : [UNSUPPORTED]\n",
+					clockstring(clock_id),
+					flags ? "ABSTIME":"RELTIME");
+			/* Indicate timer isn't set, so caller doesn't wait */
+			return 1;
+		}
+		printf("%s - timer_create() failed\n", clockstring(clock_id));
+		return -1;
+	}
+
+	clock_gettime(clock_id, &start_time);
+	if (flags) {
+		its1.it_value = start_time;
+		its1.it_value.tv_sec += TIMER_SECS;
+	} else {
+		its1.it_value.tv_sec = TIMER_SECS;
+		its1.it_value.tv_nsec = 0;
+	}
+	its1.it_interval.tv_sec = interval;
+	its1.it_interval.tv_nsec = 0;
+
+	err = timer_settime(*tm1, flags, &its1, &its2);
+	if (err) {
+		printf("%s - timer_settime() failed\n", clockstring(clock_id));
+		return -1;
+	}
+
+	return 0;
+}
+
+int check_timer_latency(int flags, int interval)
+{
+	int err = 0;
+
+	describe_timer(flags, interval);
+	printf("timer fired early: %7d : ", timer_fired_early);
+	if (!timer_fired_early) {
+		printf("[OK]\n");
+	} else {
+		printf("[FAILED]\n");
+		err = -1;
+	}
+
+	describe_timer(flags, interval);
+	printf("max latency: %10lld ns : ", max_latency_ns);
+
+	if (max_latency_ns < UNRESONABLE_LATENCY) {
+		printf("[OK]\n");
+	} else {
+		printf("[FAILED]\n");
+		err = -1;
+	}
+	return err;
+}
+
+int check_alarmcount(int flags, int interval)
+{
+	describe_timer(flags, interval);
+	printf("count: %19d : ", alarmcount);
+	if (alarmcount == 1) {
+		printf("[OK]\n");
+		return 0;
+	}
+	printf("[FAILED]\n");
+	return -1;
+}
+
+int do_timer(int clock_id, int flags)
+{
+	timer_t tm1;
+	const int interval = TIMER_SECS;
+	int err;
+
+	err = setup_timer(clock_id, flags, interval, &tm1);
+	/* Unsupported case - return 0 to not fail the test */
+	if (err)
+		return err == 1 ? 0 : err;
+
+	while (alarmcount < 5)
+		sleep(1);
+
+	timer_delete(tm1);
+	return check_timer_latency(flags, interval);
+}
+
+int do_timer_oneshot(int clock_id, int flags)
+{
+	timer_t tm1;
+	const int interval = 0;
+	struct timeval timeout;
+	int err;
+
+	err = setup_timer(clock_id, flags, interval, &tm1);
+	/* Unsupported case - return 0 to not fail the test */
+	if (err)
+		return err == 1 ? 0 : err;
+
+	memset(&timeout, 0, sizeof(timeout));
+	timeout.tv_sec = 5;
+	do {
+		err = select(0, NULL, NULL, NULL, &timeout);
+	} while (err == -1 && errno == EINTR);
+
+	timer_delete(tm1);
+	err = check_timer_latency(flags, interval);
+	err |= check_alarmcount(flags, interval);
+	return err;
+}
+
+int main(void)
+{
+	struct sigaction act;
+	int signum = SIGRTMAX;
+	int ret = 0;
+
+	/* Set up signal handler: */
+	sigfillset(&act.sa_mask);
+	act.sa_flags = 0;
+	act.sa_handler = sigalarm;
+	sigaction(signum, &act, NULL);
+
+	printf("Setting timers for every %i seconds\n", TIMER_SECS);
+	for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) {
+
+		if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) ||
+				(clock_id == CLOCK_THREAD_CPUTIME_ID) ||
+				(clock_id == CLOCK_MONOTONIC_RAW) ||
+				(clock_id == CLOCK_REALTIME_COARSE) ||
+				(clock_id == CLOCK_MONOTONIC_COARSE) ||
+				(clock_id == CLOCK_HWSPECIFIC))
+			continue;
+
+		ret |= do_timer(clock_id, TIMER_ABSTIME);
+		ret |= do_timer(clock_id, 0);
+		ret |= do_timer_oneshot(clock_id, TIMER_ABSTIME);
+		ret |= do_timer_oneshot(clock_id, 0);
+	}
+	if (ret)
+		return ksft_exit_fail();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c
new file mode 100644
index 000000000..62bd33eb1
--- /dev/null
+++ b/tools/testing/selftests/timers/set-tz.c
@@ -0,0 +1,110 @@
+/* Set tz value
+ *              by: John Stultz <john.stultz@linaro.org>
+ *              (C) Copyright Linaro 2016
+ *              Licensed under the GPLv2
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+int set_tz(int min, int dst)
+{
+	struct timezone tz;
+
+	tz.tz_minuteswest = min;
+	tz.tz_dsttime = dst;
+
+	return settimeofday(0, &tz);
+}
+
+int get_tz_min(void)
+{
+	struct timezone tz;
+	struct timeval tv;
+
+	memset(&tz, 0, sizeof(tz));
+	gettimeofday(&tv, &tz);
+	return tz.tz_minuteswest;
+}
+
+int get_tz_dst(void)
+{
+	struct timezone tz;
+	struct timeval tv;
+
+	memset(&tz, 0, sizeof(tz));
+	gettimeofday(&tv, &tz);
+	return tz.tz_dsttime;
+}
+
+int main(int argc, char **argv)
+{
+	int i, ret;
+	int min, dst;
+
+	min = get_tz_min();
+	dst = get_tz_dst();
+	printf("tz_minuteswest started at %i, dst at %i\n", min, dst);
+
+	printf("Checking tz_minuteswest can be properly set: ");
+	fflush(stdout);
+	for (i = -15*60; i < 15*60; i += 30) {
+		ret = set_tz(i, dst);
+		ret = get_tz_min();
+		if (ret != i) {
+			printf("[FAILED] expected: %i got %i\n", i, ret);
+			goto err;
+		}
+	}
+	printf("[OK]\n");
+
+	printf("Checking invalid tz_minuteswest values are caught: ");
+	fflush(stdout);
+
+	if (!set_tz(-15*60-1, dst)) {
+		printf("[FAILED] %i didn't return failure!\n", -15*60-1);
+		goto err;
+	}
+
+	if (!set_tz(15*60+1, dst)) {
+		printf("[FAILED] %i didn't return failure!\n", 15*60+1);
+		goto err;
+	}
+
+	if (!set_tz(-24*60, dst)) {
+		printf("[FAILED] %i didn't return failure!\n", -24*60);
+		goto err;
+	}
+
+	if (!set_tz(24*60, dst)) {
+		printf("[FAILED] %i didn't return failure!\n", 24*60);
+		goto err;
+	}
+
+	printf("[OK]\n");
+
+	set_tz(min, dst);
+	return ksft_exit_pass();
+
+err:
+	set_tz(min, dst);
+	return ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
new file mode 100644
index 000000000..022b711c7
--- /dev/null
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -0,0 +1,78 @@
+/* ADJ_FREQ Skew consistency test
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2012
+ *		Licensed under the GPLv2
+ *
+ *  NOTE: This is a meta-test which cranks the ADJ_FREQ knob back
+ *  and forth and watches for consistency problems. Thus this test requires
+ *  that the inconsistency-check tests be present in the same directory it
+ *  is run from.
+ *
+ *  To build:
+ *	$ gcc skew_consistency.c -o skew_consistency -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+
+int main(int argv, char **argc)
+{
+	struct timex tx;
+	int ret, ppm;
+	pid_t pid;
+
+
+	printf("Running Asynchronous Frequency Changing Tests...\n");
+
+	pid = fork();
+	if (!pid)
+		return system("./inconsistency-check -c 1 -t 600");
+
+	ppm = 500;
+	ret = 0;
+
+	while (pid != waitpid(pid, &ret, WNOHANG)) {
+		ppm = -ppm;
+		tx.modes = ADJ_FREQUENCY;
+		tx.freq = ppm << 16;
+		adjtimex(&tx);
+		usleep(500000);
+	}
+
+	/* Set things back */
+	tx.modes = ADJ_FREQUENCY;
+	tx.offset = 0;
+	adjtimex(&tx);
+
+
+	if (ret) {
+		printf("[FAILED]\n");
+		return ksft_exit_fail();
+	}
+	printf("[OK]\n");
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
new file mode 100644
index 000000000..cf3e48919
--- /dev/null
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -0,0 +1,193 @@
+/* threadtest.c
+ *		by: john stultz (johnstul@us.ibm.com)
+ *		(C) Copyright IBM 2004, 2005, 2006, 2012
+ *		Licensed under the GPLv2
+ *
+ *  To build:
+ *	$ gcc threadtest.c -o threadtest -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include "../kselftest.h"
+
+/* serializes shared list access */
+pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
+/* serializes console output */
+pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+#define MAX_THREADS 128
+#define LISTSIZE 128
+
+int done = 0;
+
+struct timespec global_list[LISTSIZE];
+int listcount = 0;
+
+
+void checklist(struct timespec *list, int size)
+{
+	int i, j;
+	struct timespec *a, *b;
+
+	/* scan the list */
+	for (i = 0; i < size-1; i++) {
+		a = &list[i];
+		b = &list[i+1];
+
+		/* look for any time inconsistencies */
+		if ((b->tv_sec <= a->tv_sec) &&
+			(b->tv_nsec < a->tv_nsec)) {
+
+			/* flag other threads */
+			done = 1;
+
+			/*serialize printing to avoid junky output*/
+			pthread_mutex_lock(&print_lock);
+
+			/* dump the list */
+			printf("\n");
+			for (j = 0; j < size; j++) {
+				if (j == i)
+					printf("---------------\n");
+				printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec);
+				if (j == i+1)
+					printf("---------------\n");
+			}
+			printf("[FAILED]\n");
+
+			pthread_mutex_unlock(&print_lock);
+		}
+	}
+}
+
+/* The shared thread shares a global list
+ * that each thread fills while holding the lock.
+ * This stresses clock syncronization across cpus.
+ */
+void *shared_thread(void *arg)
+{
+	while (!done) {
+		/* protect the list */
+		pthread_mutex_lock(&list_lock);
+
+		/* see if we're ready to check the list */
+		if (listcount >= LISTSIZE) {
+			checklist(global_list, LISTSIZE);
+			listcount = 0;
+		}
+		clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]);
+
+		pthread_mutex_unlock(&list_lock);
+	}
+	return NULL;
+}
+
+
+/* Each independent thread fills in its own
+ * list. This stresses clock_gettime() lock contention.
+ */
+void *independent_thread(void *arg)
+{
+	struct timespec my_list[LISTSIZE];
+	int count;
+
+	while (!done) {
+		/* fill the list */
+		for (count = 0; count < LISTSIZE; count++)
+			clock_gettime(CLOCK_MONOTONIC, &my_list[count]);
+		checklist(my_list, LISTSIZE);
+	}
+	return NULL;
+}
+
+#define DEFAULT_THREAD_COUNT 8
+#define DEFAULT_RUNTIME 30
+
+int main(int argc, char **argv)
+{
+	int thread_count, i;
+	time_t start, now, runtime;
+	char buf[255];
+	pthread_t pth[MAX_THREADS];
+	int opt;
+	void *tret;
+	int ret = 0;
+	void *(*thread)(void *) = shared_thread;
+
+	thread_count = DEFAULT_THREAD_COUNT;
+	runtime = DEFAULT_RUNTIME;
+
+	/* Process arguments */
+	while ((opt = getopt(argc, argv, "t:n:i")) != -1) {
+		switch (opt) {
+		case 't':
+			runtime = atoi(optarg);
+			break;
+		case 'n':
+			thread_count = atoi(optarg);
+			break;
+		case 'i':
+			thread = independent_thread;
+			printf("using independent threads\n");
+			break;
+		default:
+			printf("Usage: %s [-t <secs>] [-n <numthreads>] [-i]\n", argv[0]);
+			printf("	-t: time to run\n");
+			printf("	-n: number of threads\n");
+			printf("	-i: use independent threads\n");
+			return -1;
+		}
+	}
+
+	if (thread_count > MAX_THREADS)
+		thread_count = MAX_THREADS;
+
+
+	setbuf(stdout, NULL);
+
+	start = time(0);
+	strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start));
+	printf("%s\n", buf);
+	printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime);
+	fflush(stdout);
+
+	/* spawn */
+	for (i = 0; i < thread_count; i++)
+		pthread_create(&pth[i], 0, thread, 0);
+
+	while (time(&now) < start + runtime) {
+		sleep(1);
+		if (done) {
+			ret = 1;
+			strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now));
+			printf("%s\n", buf);
+			goto out;
+		}
+	}
+	printf("[OK]\n");
+	done = 1;
+
+out:
+	/* wait */
+	for (i = 0; i < thread_count; i++)
+		pthread_join(pth[i], &tret);
+
+	/* die */
+	if (ret)
+		ksft_exit_fail();
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
new file mode 100644
index 000000000..5397de708
--- /dev/null
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -0,0 +1,330 @@
+/* valid adjtimex test
+ *              by: John Stultz <john.stultz@linaro.org>
+ *              (C) Copyright Linaro 2015
+ *              Licensed under the GPLv2
+ *
+ *  This test validates adjtimex interface with valid
+ *  and invalid test data.
+ *
+ *  Usage: valid-adjtimex
+ *
+ *  To build:
+ *	$ gcc valid-adjtimex.c -o valid-adjtimex -lrt
+ *
+ *   This program is free software: you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation, either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000LL
+
+#define ADJ_SETOFFSET 0x0100
+
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+	return syscall(__NR_clock_adjtime, id, tx);
+}
+
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+	struct timex tx;
+	int ret;
+
+	tx.modes = ADJ_STATUS;
+	tx.status = 0;
+	ret = adjtimex(&tx);
+	return ret;
+}
+
+#define NUM_FREQ_VALID 32
+#define NUM_FREQ_OUTOFRANGE 4
+#define NUM_FREQ_INVALID 2
+
+long valid_freq[NUM_FREQ_VALID] = {
+	-499<<16,
+	-450<<16,
+	-400<<16,
+	-350<<16,
+	-300<<16,
+	-250<<16,
+	-200<<16,
+	-150<<16,
+	-100<<16,
+	-75<<16,
+	-50<<16,
+	-25<<16,
+	-10<<16,
+	-5<<16,
+	-1<<16,
+	-1000,
+	1<<16,
+	5<<16,
+	10<<16,
+	25<<16,
+	50<<16,
+	75<<16,
+	100<<16,
+	150<<16,
+	200<<16,
+	250<<16,
+	300<<16,
+	350<<16,
+	400<<16,
+	450<<16,
+	499<<16,
+};
+
+long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
+	-1000<<16,
+	-550<<16,
+	550<<16,
+	1000<<16,
+};
+
+#define LONG_MAX (~0UL>>1)
+#define LONG_MIN (-LONG_MAX - 1)
+
+long invalid_freq[NUM_FREQ_INVALID] = {
+	LONG_MAX,
+	LONG_MIN,
+};
+
+int validate_freq(void)
+{
+	struct timex tx;
+	int ret, pass = 0;
+	int i;
+
+	clear_time_state();
+
+	memset(&tx, 0, sizeof(struct timex));
+	/* Set the leap second insert flag */
+
+	printf("Testing ADJ_FREQ... ");
+	fflush(stdout);
+	for (i = 0; i < NUM_FREQ_VALID; i++) {
+		tx.modes = ADJ_FREQUENCY;
+		tx.freq = valid_freq[i];
+
+		ret = adjtimex(&tx);
+		if (ret < 0) {
+			printf("[FAIL]\n");
+			printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+				valid_freq[i], valid_freq[i]>>16);
+			pass = -1;
+			goto out;
+		}
+		tx.modes = 0;
+		ret = adjtimex(&tx);
+		if (tx.freq != valid_freq[i]) {
+			printf("Warning: freq value %ld not what we set it (%ld)!\n",
+					tx.freq, valid_freq[i]);
+		}
+	}
+	for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) {
+		tx.modes = ADJ_FREQUENCY;
+		tx.freq = outofrange_freq[i];
+
+		ret = adjtimex(&tx);
+		if (ret < 0) {
+			printf("[FAIL]\n");
+			printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+				outofrange_freq[i], outofrange_freq[i]>>16);
+			pass = -1;
+			goto out;
+		}
+		tx.modes = 0;
+		ret = adjtimex(&tx);
+		if (tx.freq == outofrange_freq[i]) {
+			printf("[FAIL]\n");
+			printf("ERROR: out of range value %ld actually set!\n",
+					tx.freq);
+			pass = -1;
+			goto out;
+		}
+	}
+
+
+	if (sizeof(long) == 8) { /* this case only applies to 64bit systems */
+		for (i = 0; i < NUM_FREQ_INVALID; i++) {
+			tx.modes = ADJ_FREQUENCY;
+			tx.freq = invalid_freq[i];
+			ret = adjtimex(&tx);
+			if (ret >= 0) {
+				printf("[FAIL]\n");
+				printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n",
+					invalid_freq[i]);
+				pass = -1;
+				goto out;
+			}
+		}
+	}
+
+	printf("[OK]\n");
+out:
+	/* reset freq to zero */
+	tx.modes = ADJ_FREQUENCY;
+	tx.freq = 0;
+	ret = adjtimex(&tx);
+
+	return pass;
+}
+
+
+int set_offset(long long offset, int use_nano)
+{
+	struct timex tmx = {};
+	int ret;
+
+	tmx.modes = ADJ_SETOFFSET;
+	if (use_nano) {
+		tmx.modes |= ADJ_NANO;
+
+		tmx.time.tv_sec = offset / NSEC_PER_SEC;
+		tmx.time.tv_usec = offset % NSEC_PER_SEC;
+
+		if (offset < 0 && tmx.time.tv_usec) {
+			tmx.time.tv_sec -= 1;
+			tmx.time.tv_usec += NSEC_PER_SEC;
+		}
+	} else {
+		tmx.time.tv_sec = offset / USEC_PER_SEC;
+		tmx.time.tv_usec = offset % USEC_PER_SEC;
+
+		if (offset < 0 && tmx.time.tv_usec) {
+			tmx.time.tv_sec -= 1;
+			tmx.time.tv_usec += USEC_PER_SEC;
+		}
+	}
+
+	ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+	if (ret < 0) {
+		printf("(sec: %ld  usec: %ld) ", tmx.time.tv_sec, tmx.time.tv_usec);
+		printf("[FAIL]\n");
+		return -1;
+	}
+	return 0;
+}
+
+int set_bad_offset(long sec, long usec, int use_nano)
+{
+	struct timex tmx = {};
+	int ret;
+
+	tmx.modes = ADJ_SETOFFSET;
+	if (use_nano)
+		tmx.modes |= ADJ_NANO;
+
+	tmx.time.tv_sec = sec;
+	tmx.time.tv_usec = usec;
+	ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+	if (ret >= 0) {
+		printf("Invalid (sec: %ld  usec: %ld) did not fail! ", tmx.time.tv_sec, tmx.time.tv_usec);
+		printf("[FAIL]\n");
+		return -1;
+	}
+	return 0;
+}
+
+int validate_set_offset(void)
+{
+	printf("Testing ADJ_SETOFFSET... ");
+	fflush(stdout);
+
+	/* Test valid values */
+	if (set_offset(NSEC_PER_SEC - 1, 1))
+		return -1;
+
+	if (set_offset(-NSEC_PER_SEC + 1, 1))
+		return -1;
+
+	if (set_offset(-NSEC_PER_SEC - 1, 1))
+		return -1;
+
+	if (set_offset(5 * NSEC_PER_SEC, 1))
+		return -1;
+
+	if (set_offset(-5 * NSEC_PER_SEC, 1))
+		return -1;
+
+	if (set_offset(5 * NSEC_PER_SEC + NSEC_PER_SEC / 2, 1))
+		return -1;
+
+	if (set_offset(-5 * NSEC_PER_SEC - NSEC_PER_SEC / 2, 1))
+		return -1;
+
+	if (set_offset(USEC_PER_SEC - 1, 0))
+		return -1;
+
+	if (set_offset(-USEC_PER_SEC + 1, 0))
+		return -1;
+
+	if (set_offset(-USEC_PER_SEC - 1, 0))
+		return -1;
+
+	if (set_offset(5 * USEC_PER_SEC, 0))
+		return -1;
+
+	if (set_offset(-5 * USEC_PER_SEC, 0))
+		return -1;
+
+	if (set_offset(5 * USEC_PER_SEC + USEC_PER_SEC / 2, 0))
+		return -1;
+
+	if (set_offset(-5 * USEC_PER_SEC - USEC_PER_SEC / 2, 0))
+		return -1;
+
+	/* Test invalid values */
+	if (set_bad_offset(0, -1, 1))
+		return -1;
+	if (set_bad_offset(0, -1, 0))
+		return -1;
+	if (set_bad_offset(0, 2 * NSEC_PER_SEC, 1))
+		return -1;
+	if (set_bad_offset(0, 2 * USEC_PER_SEC, 0))
+		return -1;
+	if (set_bad_offset(0, NSEC_PER_SEC, 1))
+		return -1;
+	if (set_bad_offset(0, USEC_PER_SEC, 0))
+		return -1;
+	if (set_bad_offset(0, -NSEC_PER_SEC, 1))
+		return -1;
+	if (set_bad_offset(0, -USEC_PER_SEC, 0))
+		return -1;
+
+	printf("[OK]\n");
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	if (validate_freq())
+		return ksft_exit_fail();
+
+	if (validate_set_offset())
+		return ksft_exit_fail();
+
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/uevent/Makefile b/tools/testing/selftests/uevent/Makefile
new file mode 100644
index 000000000..f7baa9aa2
--- /dev/null
+++ b/tools/testing/selftests/uevent/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+BINARIES := uevent_filtering
+CFLAGS += -Wl,-no-as-needed -Wall
+
+uevent_filtering: uevent_filtering.c ../kselftest.h ../kselftest_harness.h
+	$(CC) $(CFLAGS) $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/uevent/config b/tools/testing/selftests/uevent/config
new file mode 100644
index 000000000..1038f4515
--- /dev/null
+++ b/tools/testing/selftests/uevent/config
@@ -0,0 +1,2 @@
+CONFIG_USER_NS=y
+CONFIG_NET=y
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
new file mode 100644
index 000000000..f83391aa4
--- /dev/null
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/netlink.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sched.h>
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
+#define __UEVENT_BUFFER_SIZE (2048 * 2)
+#define __UEVENT_HEADER "add@/devices/virtual/mem/full"
+#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full")
+#define __UEVENT_LISTEN_ALL -1
+
+ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+	ssize_t ret;
+
+again:
+	ret = read(fd, buf, count);
+	if (ret < 0 && errno == EINTR)
+		goto again;
+
+	return ret;
+}
+
+ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+	ssize_t ret;
+
+again:
+	ret = write(fd, buf, count);
+	if (ret < 0 && errno == EINTR)
+		goto again;
+
+	return ret;
+}
+
+int wait_for_pid(pid_t pid)
+{
+	int status, ret;
+
+again:
+	ret = waitpid(pid, &status, 0);
+	if (ret == -1) {
+		if (errno == EINTR)
+			goto again;
+
+		return -1;
+	}
+
+	if (ret != pid)
+		goto again;
+
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+		return -1;
+
+	return 0;
+}
+
+static int uevent_listener(unsigned long post_flags, bool expect_uevent,
+			   int sync_fd)
+{
+	int sk_fd, ret;
+	socklen_t sk_addr_len;
+	int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE;
+	uint64_t sync_add = 1;
+	struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
+	char buf[__UEVENT_BUFFER_SIZE] = { 0 };
+	struct iovec iov = { buf, __UEVENT_BUFFER_SIZE };
+	char control[CMSG_SPACE(sizeof(struct ucred))];
+	struct msghdr hdr = {
+		&rcv_addr, sizeof(rcv_addr), &iov, 1,
+		control,   sizeof(control),  0,
+	};
+
+	sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
+		       NETLINK_KOBJECT_UEVENT);
+	if (sk_fd < 0) {
+		fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno));
+		return -1;
+	}
+
+	ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
+			 sizeof(rcv_buf_sz));
+	if (ret < 0) {
+		fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno));
+		goto on_error;
+	}
+
+	sk_addr.nl_family = AF_NETLINK;
+	sk_addr.nl_groups = __UEVENT_LISTEN_ALL;
+
+	sk_addr_len = sizeof(sk_addr);
+	ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len);
+	if (ret < 0) {
+		fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno));
+		goto on_error;
+	}
+
+	ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len);
+	if (ret < 0) {
+		fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno));
+		goto on_error;
+	}
+
+	if ((size_t)sk_addr_len != sizeof(sk_addr)) {
+		fprintf(stderr, "Invalid socket address size\n");
+		goto on_error;
+	}
+
+	if (post_flags & CLONE_NEWUSER) {
+		ret = unshare(CLONE_NEWUSER);
+		if (ret < 0) {
+			fprintf(stderr,
+				"%s - Failed to unshare user namespace\n",
+				strerror(errno));
+			goto on_error;
+		}
+	}
+
+	if (post_flags & CLONE_NEWNET) {
+		ret = unshare(CLONE_NEWNET);
+		if (ret < 0) {
+			fprintf(stderr,
+				"%s - Failed to unshare network namespace\n",
+				strerror(errno));
+			goto on_error;
+		}
+	}
+
+	ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
+	close(sync_fd);
+	if (ret != sizeof(sync_add)) {
+		fprintf(stderr, "Failed to synchronize with parent process\n");
+		goto on_error;
+	}
+
+	fret = 0;
+	for (;;) {
+		ssize_t r;
+
+		r = recvmsg(sk_fd, &hdr, 0);
+		if (r <= 0) {
+			fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno));
+			ret = -1;
+			break;
+		}
+
+		/* ignore libudev messages */
+		if (memcmp(buf, "libudev", 8) == 0)
+			continue;
+
+		/* ignore uevents we didn't trigger */
+		if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0)
+			continue;
+
+		if (!expect_uevent) {
+			fprintf(stderr, "Received unexpected uevent:\n");
+			ret = -1;
+		}
+
+		if (TH_LOG_ENABLED) {
+			/* If logging is enabled dump the received uevent. */
+			(void)write_nointr(STDERR_FILENO, buf, r);
+			(void)write_nointr(STDERR_FILENO, "\n", 1);
+		}
+
+		break;
+	}
+
+on_error:
+	close(sk_fd);
+
+	return fret;
+}
+
+int trigger_uevent(unsigned int times)
+{
+	int fd, ret;
+	unsigned int i;
+
+	fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC);
+	if (fd < 0) {
+		if (errno != ENOENT)
+			return -EINVAL;
+
+		return -1;
+	}
+
+	for (i = 0; i < times; i++) {
+		ret = write_nointr(fd, "add\n", sizeof("add\n") - 1);
+		if (ret < 0) {
+			fprintf(stderr, "Failed to trigger uevent\n");
+			break;
+		}
+	}
+	close(fd);
+
+	return ret;
+}
+
+int set_death_signal(void)
+{
+	int ret;
+	pid_t ppid;
+
+	ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+
+	/* Check whether we have been orphaned. */
+	ppid = getppid();
+	if (ppid == 1) {
+		pid_t self;
+
+		self = getpid();
+		ret = kill(self, SIGKILL);
+	}
+
+	if (ret < 0)
+		return -1;
+
+	return 0;
+}
+
+static int do_test(unsigned long pre_flags, unsigned long post_flags,
+		   bool expect_uevent, int sync_fd)
+{
+	int ret;
+	uint64_t wait_val;
+	pid_t pid;
+	sigset_t mask;
+	sigset_t orig_mask;
+	struct timespec timeout;
+
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGCHLD);
+
+	ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask);
+	if (ret < 0) {
+		fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno));
+		return -1;
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno));
+		return -1;
+	}
+
+	if (pid == 0) {
+		/* Make sure that we go away when our parent dies. */
+		ret = set_death_signal();
+		if (ret < 0) {
+			fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n");
+			_exit(EXIT_FAILURE);
+		}
+
+		if (pre_flags & CLONE_NEWUSER) {
+			ret = unshare(CLONE_NEWUSER);
+			if (ret < 0) {
+				fprintf(stderr,
+					"%s - Failed to unshare user namespace\n",
+					strerror(errno));
+				_exit(EXIT_FAILURE);
+			}
+		}
+
+		if (pre_flags & CLONE_NEWNET) {
+			ret = unshare(CLONE_NEWNET);
+			if (ret < 0) {
+				fprintf(stderr,
+					"%s - Failed to unshare network namespace\n",
+					strerror(errno));
+				_exit(EXIT_FAILURE);
+			}
+		}
+
+		if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0)
+			_exit(EXIT_FAILURE);
+
+		_exit(EXIT_SUCCESS);
+	}
+
+	ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val));
+	if (ret != sizeof(wait_val)) {
+		fprintf(stderr, "Failed to synchronize with child process\n");
+		_exit(EXIT_FAILURE);
+	}
+
+	/* Trigger 10 uevents to account for the case where the kernel might
+	 * drop some.
+	 */
+	ret = trigger_uevent(10);
+	if (ret < 0)
+		fprintf(stderr, "Failed triggering uevents\n");
+
+	/* Wait for 2 seconds before considering this failed. This should be
+	 * plenty of time for the kernel to deliver the uevent even under heavy
+	 * load.
+	 */
+	timeout.tv_sec = 2;
+	timeout.tv_nsec = 0;
+
+again:
+	ret = sigtimedwait(&mask, NULL, &timeout);
+	if (ret < 0) {
+		if (errno == EINTR)
+			goto again;
+
+		if (!expect_uevent)
+			ret = kill(pid, SIGTERM); /* success */
+		else
+			ret = kill(pid, SIGUSR1); /* error */
+		if (ret < 0)
+			return -1;
+	}
+
+	ret = wait_for_pid(pid);
+	if (ret < 0)
+		return -1;
+
+	return ret;
+}
+
+static void signal_handler(int sig)
+{
+	if (sig == SIGTERM)
+		_exit(EXIT_SUCCESS);
+
+	_exit(EXIT_FAILURE);
+}
+
+TEST(uevent_filtering)
+{
+	int ret, sync_fd;
+	struct sigaction act;
+
+	if (geteuid()) {
+		TH_LOG("Uevent filtering tests require root privileges. Skipping test");
+		_exit(KSFT_SKIP);
+	}
+
+	ret = access(__DEV_FULL, F_OK);
+	EXPECT_EQ(0, ret) {
+		if (errno == ENOENT) {
+			TH_LOG(__DEV_FULL " does not exist. Skipping test");
+			_exit(KSFT_SKIP);
+		}
+
+		_exit(KSFT_FAIL);
+	}
+
+	act.sa_handler = signal_handler;
+	act.sa_flags = 0;
+	sigemptyset(&act.sa_mask);
+
+	ret = sigaction(SIGTERM, &act, NULL);
+	ASSERT_EQ(0, ret);
+
+	sync_fd = eventfd(0, EFD_CLOEXEC);
+	ASSERT_GE(sync_fd, 0);
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in initial network namespace owned by
+	 *   initial user namespace.
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(0, 0, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in non-initial network namespace
+	 *   owned by initial user namespace.
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(CLONE_NEWNET, 0, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - unshare user namespace
+	 * - Open uevent listening socket in initial network namespace
+	 *   owned by initial user namespace.
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(CLONE_NEWUSER, 0, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in non-initial network namespace
+	 *   owned by non-initial user namespace.
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives no uevent
+	 */
+	ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in initial network namespace
+	 *   owned by initial user namespace.
+	 * - unshare network namespace
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(0, CLONE_NEWNET, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in initial network namespace
+	 *   owned by initial user namespace.
+	 * - unshare user namespace
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(0, CLONE_NEWUSER, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+	/*
+	 * Setup:
+	 * - Open uevent listening socket in initial network namespace
+	 *   owned by initial user namespace.
+	 * - unshare user namespace
+	 * - unshare network namespace
+	 * - Trigger uevent in initial network namespace owned by initial user
+	 *   namespace.
+	 * Expected Result:
+	 * - uevent listening socket receives uevent
+	 */
+	ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd);
+	ASSERT_EQ(0, ret) {
+		goto do_cleanup;
+	}
+
+do_cleanup:
+	close(sync_fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile
new file mode 100644
index 000000000..d401b63c5
--- /dev/null
+++ b/tools/testing/selftests/user/Makefile
@@ -0,0 +1,8 @@
+# Makefile for user memory selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_user_copy.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/user/config b/tools/testing/selftests/user/config
new file mode 100644
index 000000000..784ed8416
--- /dev/null
+++ b/tools/testing/selftests/user/config
@@ -0,0 +1 @@
+CONFIG_TEST_USER_COPY=m
diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh
new file mode 100755
index 000000000..f9b31a574
--- /dev/null
+++ b/tools/testing/selftests/user/test_user_copy.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs copy_to/from_user infrastructure using test_user_copy kernel module
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_user_copy; then
+	echo "user: module test_user_copy is not found [SKIP]"
+	exit $ksft_skip
+fi
+if /sbin/modprobe -q test_user_copy; then
+	/sbin/modprobe -q -r test_user_copy
+	echo "user_copy: ok"
+else
+	echo "user_copy: [FAIL]"
+	exit 1
+fi
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
new file mode 100644
index 000000000..133bf9ee9
--- /dev/null
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -0,0 +1,2 @@
+vdso_test
+vdso_standalone_test_x86
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
new file mode 100644
index 000000000..9e03d61f5
--- /dev/null
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../lib.mk
+
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+TEST_GEN_PROGS := $(OUTPUT)/vdso_test
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
+endif
+
+ifndef CROSS_COMPILE
+CFLAGS := -std=gnu99
+CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+ifeq ($(CONFIG_X86_32),y)
+LDLIBS += -lgcc_s
+endif
+
+all: $(TEST_GEN_PROGS)
+$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
+	$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
+		vdso_standalone_test_x86.c parse_vdso.c \
+		-o $@
+
+endif
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
new file mode 100644
index 000000000..1dbb4b872
--- /dev/null
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -0,0 +1,269 @@
+/*
+ * parse_vdso.c: Linux reference vDSO parser
+ * Written by Andrew Lutomirski, 2011-2014.
+ *
+ * This code is meant to be linked in to various programs that run on Linux.
+ * As such, it is available with as few restrictions as possible.  This file
+ * is licensed under the Creative Commons Zero License, version 1.0,
+ * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
+ *
+ * The vDSO is a regular ELF DSO that the kernel maps into user space when
+ * it starts a program.  It works equally well in statically and dynamically
+ * linked binaries.
+ *
+ * This code is tested on x86.  In principle it should work on any
+ * architecture that has a vDSO.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <elf.h>
+
+/*
+ * To use this vDSO parser, first call one of the vdso_init_* functions.
+ * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
+ * to vdso_init_from_sysinfo_ehdr.  Otherwise pass auxv to vdso_init_from_auxv.
+ * Then call vdso_sym for each symbol you want.  For example, to look up
+ * gettimeofday on x86_64, use:
+ *
+ *     <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
+ * or
+ *     <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+ *
+ * vdso_sym will return 0 if the symbol doesn't exist or if the init function
+ * failed or was not called.  vdso_sym is a little slow, so its return value
+ * should be cached.
+ *
+ * vdso_sym is threadsafe; the init functions are not.
+ *
+ * These are the prototypes:
+ */
+extern void vdso_init_from_auxv(void *auxv);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void *vdso_sym(const char *version, const char *name);
+
+
+/* And here's the code. */
+#ifndef ELF_BITS
+# if ULONG_MAX > 0xffffffffUL
+#  define ELF_BITS 64
+# else
+#  define ELF_BITS 32
+# endif
+#endif
+
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
+static struct vdso_info
+{
+	bool valid;
+
+	/* Load information */
+	uintptr_t load_addr;
+	uintptr_t load_offset;  /* load_addr - recorded vaddr */
+
+	/* Symbol table */
+	ELF(Sym) *symtab;
+	const char *symstrings;
+	ELF(Word) *bucket, *chain;
+	ELF(Word) nbucket, nchain;
+
+	/* Version table */
+	ELF(Versym) *versym;
+	ELF(Verdef) *verdef;
+} vdso_info;
+
+/* Straight from the ELF specification. */
+static unsigned long elf_hash(const unsigned char *name)
+{
+	unsigned long h = 0, g;
+	while (*name)
+	{
+		h = (h << 4) + *name++;
+		if (g = h & 0xf0000000)
+			h ^= g >> 24;
+		h &= ~g;
+	}
+	return h;
+}
+
+void vdso_init_from_sysinfo_ehdr(uintptr_t base)
+{
+	size_t i;
+	bool found_vaddr = false;
+
+	vdso_info.valid = false;
+
+	vdso_info.load_addr = base;
+
+	ELF(Ehdr) *hdr = (ELF(Ehdr)*)base;
+	if (hdr->e_ident[EI_CLASS] !=
+	    (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) {
+		return;  /* Wrong ELF class -- check ELF_BITS */
+	}
+
+	ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff);
+	ELF(Dyn) *dyn = 0;
+
+	/*
+	 * We need two things from the segment table: the load offset
+	 * and the dynamic table.
+	 */
+	for (i = 0; i < hdr->e_phnum; i++)
+	{
+		if (pt[i].p_type == PT_LOAD && !found_vaddr) {
+			found_vaddr = true;
+			vdso_info.load_offset =	base
+				+ (uintptr_t)pt[i].p_offset
+				- (uintptr_t)pt[i].p_vaddr;
+		} else if (pt[i].p_type == PT_DYNAMIC) {
+			dyn = (ELF(Dyn)*)(base + pt[i].p_offset);
+		}
+	}
+
+	if (!found_vaddr || !dyn)
+		return;  /* Failed */
+
+	/*
+	 * Fish out the useful bits of the dynamic table.
+	 */
+	ELF(Word) *hash = 0;
+	vdso_info.symstrings = 0;
+	vdso_info.symtab = 0;
+	vdso_info.versym = 0;
+	vdso_info.verdef = 0;
+	for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
+		switch (dyn[i].d_tag) {
+		case DT_STRTAB:
+			vdso_info.symstrings = (const char *)
+				((uintptr_t)dyn[i].d_un.d_ptr
+				 + vdso_info.load_offset);
+			break;
+		case DT_SYMTAB:
+			vdso_info.symtab = (ELF(Sym) *)
+				((uintptr_t)dyn[i].d_un.d_ptr
+				 + vdso_info.load_offset);
+			break;
+		case DT_HASH:
+			hash = (ELF(Word) *)
+				((uintptr_t)dyn[i].d_un.d_ptr
+				 + vdso_info.load_offset);
+			break;
+		case DT_VERSYM:
+			vdso_info.versym = (ELF(Versym) *)
+				((uintptr_t)dyn[i].d_un.d_ptr
+				 + vdso_info.load_offset);
+			break;
+		case DT_VERDEF:
+			vdso_info.verdef = (ELF(Verdef) *)
+				((uintptr_t)dyn[i].d_un.d_ptr
+				 + vdso_info.load_offset);
+			break;
+		}
+	}
+	if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
+		return;  /* Failed */
+
+	if (!vdso_info.verdef)
+		vdso_info.versym = 0;
+
+	/* Parse the hash table header. */
+	vdso_info.nbucket = hash[0];
+	vdso_info.nchain = hash[1];
+	vdso_info.bucket = &hash[2];
+	vdso_info.chain = &hash[vdso_info.nbucket + 2];
+
+	/* That's all we need. */
+	vdso_info.valid = true;
+}
+
+static bool vdso_match_version(ELF(Versym) ver,
+			       const char *name, ELF(Word) hash)
+{
+	/*
+	 * This is a helper function to check if the version indexed by
+	 * ver matches name (which hashes to hash).
+	 *
+	 * The version definition table is a mess, and I don't know how
+	 * to do this in better than linear time without allocating memory
+	 * to build an index.  I also don't know why the table has
+	 * variable size entries in the first place.
+	 *
+	 * For added fun, I can't find a comprehensible specification of how
+	 * to parse all the weird flags in the table.
+	 *
+	 * So I just parse the whole table every time.
+	 */
+
+	/* First step: find the version definition */
+	ver &= 0x7fff;  /* Apparently bit 15 means "hidden" */
+	ELF(Verdef) *def = vdso_info.verdef;
+	while(true) {
+		if ((def->vd_flags & VER_FLG_BASE) == 0
+		    && (def->vd_ndx & 0x7fff) == ver)
+			break;
+
+		if (def->vd_next == 0)
+			return false;  /* No definition. */
+
+		def = (ELF(Verdef) *)((char *)def + def->vd_next);
+	}
+
+	/* Now figure out whether it matches. */
+	ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux);
+	return def->vd_hash == hash
+		&& !strcmp(name, vdso_info.symstrings + aux->vda_name);
+}
+
+void *vdso_sym(const char *version, const char *name)
+{
+	unsigned long ver_hash;
+	if (!vdso_info.valid)
+		return 0;
+
+	ver_hash = elf_hash(version);
+	ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
+
+	for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
+		ELF(Sym) *sym = &vdso_info.symtab[chain];
+
+		/* Check for a defined global or weak function w/ right name. */
+		if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+			continue;
+		if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+		    ELF64_ST_BIND(sym->st_info) != STB_WEAK)
+			continue;
+		if (sym->st_shndx == SHN_UNDEF)
+			continue;
+		if (strcmp(name, vdso_info.symstrings + sym->st_name))
+			continue;
+
+		/* Check symbol version. */
+		if (vdso_info.versym
+		    && !vdso_match_version(vdso_info.versym[chain],
+					   version, ver_hash))
+			continue;
+
+		return (void *)(vdso_info.load_offset + sym->st_value);
+	}
+
+	return 0;
+}
+
+void vdso_init_from_auxv(void *auxv)
+{
+	ELF(auxv_t) *elf_auxv = auxv;
+	for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
+	{
+		if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
+			vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
+			return;
+		}
+	}
+
+	vdso_info.valid = false;
+}
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
new file mode 100644
index 000000000..93b0ebf8c
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -0,0 +1,128 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c on x86
+ * Copyright (c) 2011-2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * You can amuse yourself by compiling with:
+ * gcc -std=gnu99 -nostdlib
+ *     -Os -fno-asynchronous-unwind-tables -flto -lgcc_s
+ *      vdso_standalone_test_x86.c parse_vdso.c
+ * to generate a small binary.  On x86_64, you can omit -lgcc_s
+ * if you want the binary to be completely standalone.
+ */
+
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+/* We need a libc functions... */
+int strcmp(const char *a, const char *b)
+{
+	/* This implementation is buggy: it never returns -1. */
+	while (*a || *b) {
+		if (*a != *b)
+			return 1;
+		if (*a == 0 || *b == 0)
+			return 1;
+		a++;
+		b++;
+	}
+
+	return 0;
+}
+
+/* ...and two syscalls.  This is x86-specific. */
+static inline long x86_syscall3(long nr, long a0, long a1, long a2)
+{
+	long ret;
+#ifdef __x86_64__
+	asm volatile ("syscall" : "=a" (ret) : "a" (nr),
+		      "D" (a0), "S" (a1), "d" (a2) :
+		      "cc", "memory", "rcx",
+		      "r8", "r9", "r10", "r11" );
+#else
+	asm volatile ("int $0x80" : "=a" (ret) : "a" (nr),
+		      "b" (a0), "c" (a1), "d" (a2) :
+		      "cc", "memory" );
+#endif
+	return ret;
+}
+
+static inline long linux_write(int fd, const void *data, size_t len)
+{
+	return x86_syscall3(__NR_write, fd, (long)data, (long)len);
+}
+
+static inline void linux_exit(int code)
+{
+	x86_syscall3(__NR_exit, code, 0, 0);
+}
+
+void to_base10(char *lastdig, time_t n)
+{
+	while (n) {
+		*lastdig = (n % 10) + '0';
+		n /= 10;
+		lastdig--;
+	}
+}
+
+__attribute__((externally_visible)) void c_main(void **stack)
+{
+	/* Parse the stack */
+	long argc = (long)*stack;
+	stack += argc + 2;
+
+	/* Now we're pointing at the environment.  Skip it. */
+	while(*stack)
+		stack++;
+	stack++;
+
+	/* Now we're pointing at auxv.  Initialize the vDSO parser. */
+	vdso_init_from_auxv((void *)stack);
+
+	/* Find gettimeofday. */
+	typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+	gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+
+	if (!gtod)
+		linux_exit(1);
+
+	struct timeval tv;
+	long ret = gtod(&tv, 0);
+
+	if (ret == 0) {
+		char buf[] = "The time is                     .000000\n";
+		to_base10(buf + 31, tv.tv_sec);
+		to_base10(buf + 38, tv.tv_usec);
+		linux_write(1, buf, sizeof(buf) - 1);
+	} else {
+		linux_exit(ret);
+	}
+
+	linux_exit(0);
+}
+
+/*
+ * This is the real entry point.  It passes the initial stack into
+ * the C entry point.
+ */
+asm (
+	".text\n"
+	".global _start\n"
+	".type _start,@function\n"
+	"_start:\n\t"
+#ifdef __x86_64__
+	"mov %rsp,%rdi\n\t"
+	"jmp c_main"
+#else
+	"push %esp\n\t"
+	"call c_main\n\t"
+	"int $3"
+#endif
+	);
diff --git a/tools/testing/selftests/vDSO/vdso_test.c b/tools/testing/selftests/vDSO/vdso_test.c
new file mode 100644
index 000000000..eda53f833
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test.c
@@ -0,0 +1,68 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c
+ * Copyright (c) 2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_test.c parse_vdso.c
+ *
+ * Tested on x86, 32-bit and 64-bit.  It may work on other architectures, too.
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+
+#include "../kselftest.h"
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+/*
+ * ARM64's vDSO exports its gettimeofday() implementation with a different
+ * name and version from other architectures, so we need to handle it as
+ * a special case.
+ */
+#if defined(__aarch64__)
+const char *version = "LINUX_2.6.39";
+const char *name = "__kernel_gettimeofday";
+#else
+const char *version = "LINUX_2.6";
+const char *name = "__vdso_gettimeofday";
+#endif
+
+int main(int argc, char **argv)
+{
+	unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+	if (!sysinfo_ehdr) {
+		printf("AT_SYSINFO_EHDR is not present!\n");
+		return KSFT_SKIP;
+	}
+
+	vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+	/* Find gettimeofday. */
+	typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+	gtod_t gtod = (gtod_t)vdso_sym(version, name);
+
+	if (!gtod) {
+		printf("Could not find %s\n", name);
+		return KSFT_SKIP;
+	}
+
+	struct timeval tv;
+	long ret = gtod(&tv, 0);
+
+	if (ret == 0) {
+		printf("The time is %lld.%06lld\n",
+		       (long long)tv.tv_sec, (long long)tv.tv_usec);
+	} else {
+		printf("%s failed\n", name);
+		return KSFT_FAIL;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
new file mode 100644
index 000000000..af5ff83f6
--- /dev/null
+++ b/tools/testing/selftests/vm/.gitignore
@@ -0,0 +1,15 @@
+hugepage-mmap
+hugepage-shm
+map_hugetlb
+map_populate
+thuge-gen
+compaction_test
+mlock2-tests
+on-fault-limit
+transhuge-stress
+userfaultfd
+mlock-intersect-test
+mlock-random-test
+virtual_address_range
+gup_benchmark
+va_128TBswitch
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
new file mode 100644
index 000000000..2cf3dc49b
--- /dev/null
+++ b/tools/testing/selftests/vm/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for vm selftests
+
+ifndef OUTPUT
+  OUTPUT := $(shell pwd)
+endif
+
+CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
+LDLIBS = -lrt
+TEST_GEN_FILES = compaction_test
+TEST_GEN_FILES += gup_benchmark
+TEST_GEN_FILES += hugepage-mmap
+TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_populate
+TEST_GEN_FILES += mlock-random-test
+TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += on-fault-limit
+TEST_GEN_FILES += thuge-gen
+TEST_GEN_FILES += transhuge-stress
+TEST_GEN_FILES += userfaultfd
+TEST_GEN_FILES += va_128TBswitch
+TEST_GEN_FILES += virtual_address_range
+
+TEST_PROGS := run_vmtests
+
+TEST_FILES := test_vmalloc.sh
+
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
+$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
+
+$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
new file mode 100644
index 000000000..bcec71250
--- /dev/null
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * A test for the patch "Allow compaction of unevictable pages".
+ * With this patch we should be able to allocate at least 1/4
+ * of RAM in huge pages. Without the patch much less is
+ * allocated.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+#define MAP_SIZE 1048576
+
+struct map_list {
+	void *map;
+	struct map_list *next;
+};
+
+int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
+{
+	char  buffer[256] = {0};
+	char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'";
+	FILE *cmdfile = popen(cmd, "r");
+
+	if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+		perror("Failed to read meminfo\n");
+		return -1;
+	}
+
+	pclose(cmdfile);
+
+	*memfree = atoll(buffer);
+	cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'";
+	cmdfile = popen(cmd, "r");
+
+	if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+		perror("Failed to read meminfo\n");
+		return -1;
+	}
+
+	pclose(cmdfile);
+	*hugepagesize = atoll(buffer);
+
+	return 0;
+}
+
+int prereq(void)
+{
+	char allowed;
+	int fd;
+
+	fd = open("/proc/sys/vm/compact_unevictable_allowed",
+		  O_RDONLY | O_NONBLOCK);
+	if (fd < 0) {
+		perror("Failed to open\n"
+		       "/proc/sys/vm/compact_unevictable_allowed\n");
+		return -1;
+	}
+
+	if (read(fd, &allowed, sizeof(char)) != sizeof(char)) {
+		perror("Failed to read from\n"
+		       "/proc/sys/vm/compact_unevictable_allowed\n");
+		close(fd);
+		return -1;
+	}
+
+	close(fd);
+	if (allowed == '1')
+		return 0;
+
+	return -1;
+}
+
+int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
+{
+	int fd;
+	int compaction_index = 0;
+	char initial_nr_hugepages[10] = {0};
+	char nr_hugepages[10] = {0};
+
+	/* We want to test with 80% of available memory. Else, OOM killer comes
+	   in to play */
+	mem_free = mem_free * 0.8;
+
+	fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
+	if (fd < 0) {
+		perror("Failed to open /proc/sys/vm/nr_hugepages");
+		return -1;
+	}
+
+	if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
+		perror("Failed to read from /proc/sys/vm/nr_hugepages");
+		goto close_fd;
+	}
+
+	/* Start with the initial condition of 0 huge pages*/
+	if (write(fd, "0", sizeof(char)) != sizeof(char)) {
+		perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n");
+		goto close_fd;
+	}
+
+	lseek(fd, 0, SEEK_SET);
+
+	/* Request a large number of huge pages. The Kernel will allocate
+	   as much as it can */
+	if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
+		perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n");
+		goto close_fd;
+	}
+
+	lseek(fd, 0, SEEK_SET);
+
+	if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
+		perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n");
+		goto close_fd;
+	}
+
+	/* We should have been able to request at least 1/3 rd of the memory in
+	   huge pages */
+	compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size);
+
+	if (compaction_index > 3) {
+		printf("No of huge pages allocated = %d\n",
+		       (atoi(nr_hugepages)));
+		fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n"
+			"as huge pages\n", compaction_index);
+		goto close_fd;
+	}
+
+	printf("No of huge pages allocated = %d\n",
+	       (atoi(nr_hugepages)));
+
+	lseek(fd, 0, SEEK_SET);
+
+	if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
+	    != strlen(initial_nr_hugepages)) {
+		perror("Failed to write value to /proc/sys/vm/nr_hugepages\n");
+		goto close_fd;
+	}
+
+	close(fd);
+	return 0;
+
+ close_fd:
+	close(fd);
+	printf("Not OK. Compaction test failed.");
+	return -1;
+}
+
+
+int main(int argc, char **argv)
+{
+	struct rlimit lim;
+	struct map_list *list, *entry;
+	size_t page_size, i;
+	void *map = NULL;
+	unsigned long mem_free = 0;
+	unsigned long hugepage_size = 0;
+	unsigned long mem_fragmentable = 0;
+
+	if (prereq() != 0) {
+		printf("Either the sysctl compact_unevictable_allowed is not\n"
+		       "set to 1 or couldn't read the proc file.\n"
+		       "Skipping the test\n");
+		return KSFT_SKIP;
+	}
+
+	lim.rlim_cur = RLIM_INFINITY;
+	lim.rlim_max = RLIM_INFINITY;
+	if (setrlimit(RLIMIT_MEMLOCK, &lim)) {
+		perror("Failed to set rlimit:\n");
+		return -1;
+	}
+
+	page_size = getpagesize();
+
+	list = NULL;
+
+	if (read_memory_info(&mem_free, &hugepage_size) != 0) {
+		printf("ERROR: Cannot read meminfo\n");
+		return -1;
+	}
+
+	mem_fragmentable = mem_free * 0.8 / 1024;
+
+	while (mem_fragmentable > 0) {
+		map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
+			   MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
+		if (map == MAP_FAILED)
+			break;
+
+		entry = malloc(sizeof(struct map_list));
+		if (!entry) {
+			munmap(map, MAP_SIZE);
+			break;
+		}
+		entry->map = map;
+		entry->next = list;
+		list = entry;
+
+		/* Write something (in this case the address of the map) to
+		 * ensure that KSM can't merge the mapped pages
+		 */
+		for (i = 0; i < MAP_SIZE; i += page_size)
+			*(unsigned long *)(map + i) = (unsigned long)map + i;
+
+		mem_fragmentable--;
+	}
+
+	for (entry = list; entry != NULL; entry = entry->next) {
+		munmap(entry->map, MAP_SIZE);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+
+	if (check_compaction(mem_free, hugepage_size) == 0)
+		return 0;
+
+	return -1;
+}
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
new file mode 100644
index 000000000..1c0d76cb5
--- /dev/null
+++ b/tools/testing/selftests/vm/config
@@ -0,0 +1,2 @@
+CONFIG_SYSVIPC=y
+CONFIG_USERFAULTFD=y
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
new file mode 100644
index 000000000..17da711f2
--- /dev/null
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -0,0 +1,93 @@
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <linux/types.h>
+
+#define MB (1UL << 20)
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+
+#define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
+
+struct gup_benchmark {
+	__u64 delta_usec;
+	__u64 addr;
+	__u64 size;
+	__u32 nr_pages_per_call;
+	__u32 flags;
+	__u64 expansion[10];	/* For future use */
+};
+
+int main(int argc, char **argv)
+{
+	struct gup_benchmark gup;
+	unsigned long size = 128 * MB;
+	int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+	char *p;
+
+	while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) {
+		switch (opt) {
+		case 'm':
+			size = atoi(optarg) * MB;
+			break;
+		case 'r':
+			repeats = atoi(optarg);
+			break;
+		case 'n':
+			nr_pages = atoi(optarg);
+			break;
+		case 't':
+			thp = 1;
+			break;
+		case 'T':
+			thp = 0;
+			break;
+		case 'w':
+			write = 1;
+			break;
+		default:
+			return -1;
+		}
+	}
+
+	gup.nr_pages_per_call = nr_pages;
+	gup.flags = write;
+
+	fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
+	if (fd == -1)
+		perror("open"), exit(1);
+
+	p = mmap(NULL, size, PROT_READ | PROT_WRITE,
+			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (p == MAP_FAILED)
+		perror("mmap"), exit(1);
+	gup.addr = (unsigned long)p;
+
+	if (thp == 1)
+		madvise(p, size, MADV_HUGEPAGE);
+	else if (thp == 0)
+		madvise(p, size, MADV_NOHUGEPAGE);
+
+	for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
+		p[0] = 0;
+
+	for (i = 0; i < repeats; i++) {
+		gup.size = size;
+		if (ioctl(fd, GUP_FAST_BENCHMARK, &gup))
+			perror("ioctl"), exit(1);
+
+		printf("Time: %lld us", gup.delta_usec);
+		if (gup.size != size)
+			printf(", truncated (size: %lld)", gup.size);
+		printf("\n");
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c
new file mode 100644
index 000000000..93f9e7b81
--- /dev/null
+++ b/tools/testing/selftests/vm/hugepage-mmap.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-mmap:
+ *
+ * Example of using huge page memory in a user application using the mmap
+ * system call.  Before running this application, make sure that the
+ * administrator has mounted the hugetlbfs filesystem (on some directory
+ * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
+ * example, the app is requesting memory of size 256MB that is backed by
+ * huge pages.
+ *
+ * For the ia64 architecture, the Linux kernel reserves Region number 4 for
+ * huge pages.  That means that if one requires a fixed address, a huge page
+ * aligned address starting with 0x800000... will be required.  If a fixed
+ * address is not required, the kernel will select an address in the proper
+ * range.
+ * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define FILE_NAME "huge/hugepagefile"
+#define LENGTH (256UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define ADDR (void *)(0x8000000000000000UL)
+#define FLAGS (MAP_SHARED | MAP_FIXED)
+#else
+#define ADDR (void *)(0x0UL)
+#define FLAGS (MAP_SHARED)
+#endif
+
+static void check_bytes(char *addr)
+{
+	printf("First hex is %x\n", *((unsigned int *)addr));
+}
+
+static void write_bytes(char *addr)
+{
+	unsigned long i;
+
+	for (i = 0; i < LENGTH; i++)
+		*(addr + i) = (char)i;
+}
+
+static int read_bytes(char *addr)
+{
+	unsigned long i;
+
+	check_bytes(addr);
+	for (i = 0; i < LENGTH; i++)
+		if (*(addr + i) != (char)i) {
+			printf("Mismatch at %lu\n", i);
+			return 1;
+		}
+	return 0;
+}
+
+int main(void)
+{
+	void *addr;
+	int fd, ret;
+
+	fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
+	if (fd < 0) {
+		perror("Open failed");
+		exit(1);
+	}
+
+	addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		unlink(FILE_NAME);
+		exit(1);
+	}
+
+	printf("Returned address is %p\n", addr);
+	check_bytes(addr);
+	write_bytes(addr);
+	ret = read_bytes(addr);
+
+	munmap(addr, LENGTH);
+	close(fd);
+	unlink(FILE_NAME);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/vm/hugepage-shm.c b/tools/testing/selftests/vm/hugepage-shm.c
new file mode 100644
index 000000000..e2527f320
--- /dev/null
+++ b/tools/testing/selftests/vm/hugepage-shm.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-shm:
+ *
+ * Example of using huge page memory in a user application using Sys V shared
+ * memory system calls.  In this example the app is requesting 256MB of
+ * memory that is backed by huge pages.  The application uses the flag
+ * SHM_HUGETLB in the shmget system call to inform the kernel that it is
+ * requesting huge pages.
+ *
+ * For the ia64 architecture, the Linux kernel reserves Region number 4 for
+ * huge pages.  That means that if one requires a fixed address, a huge page
+ * aligned address starting with 0x800000... will be required.  If a fixed
+ * address is not required, the kernel will select an address in the proper
+ * range.
+ * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
+ *
+ * Note: The default shared memory limit is quite low on many kernels,
+ * you may need to increase it via:
+ *
+ * echo 268435456 > /proc/sys/kernel/shmmax
+ *
+ * This will increase the maximum size per shared memory segment to 256MB.
+ * The other limit that you will hit eventually is shmall which is the
+ * total amount of shared memory in pages. To set it to 16GB on a system
+ * with a 4kB pagesize do:
+ *
+ * echo 4194304 > /proc/sys/kernel/shmall
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+
+#ifndef SHM_HUGETLB
+#define SHM_HUGETLB 04000
+#endif
+
+#define LENGTH (256UL*1024*1024)
+
+#define dprintf(x)  printf(x)
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define ADDR (void *)(0x8000000000000000UL)
+#define SHMAT_FLAGS (SHM_RND)
+#else
+#define ADDR (void *)(0x0UL)
+#define SHMAT_FLAGS (0)
+#endif
+
+int main(void)
+{
+	int shmid;
+	unsigned long i;
+	char *shmaddr;
+
+	shmid = shmget(2, LENGTH, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+	if (shmid < 0) {
+		perror("shmget");
+		exit(1);
+	}
+	printf("shmid: 0x%x\n", shmid);
+
+	shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS);
+	if (shmaddr == (char *)-1) {
+		perror("Shared memory attach failure");
+		shmctl(shmid, IPC_RMID, NULL);
+		exit(2);
+	}
+	printf("shmaddr: %p\n", shmaddr);
+
+	dprintf("Starting the writes:\n");
+	for (i = 0; i < LENGTH; i++) {
+		shmaddr[i] = (char)(i);
+		if (!(i % (1024 * 1024)))
+			dprintf(".");
+	}
+	dprintf("\n");
+
+	dprintf("Starting the Check...");
+	for (i = 0; i < LENGTH; i++)
+		if (shmaddr[i] != (char)i) {
+			printf("\nIndex %lu mismatched\n", i);
+			exit(3);
+		}
+	dprintf("Done.\n");
+
+	if (shmdt((const void *)shmaddr) != 0) {
+		perror("Detach failure");
+		shmctl(shmid, IPC_RMID, NULL);
+		exit(4);
+	}
+
+	shmctl(shmid, IPC_RMID, NULL);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
new file mode 100644
index 000000000..9b777fa95
--- /dev/null
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Example of using hugepage memory in a user application using the mmap
+ * system call with MAP_HUGETLB flag.  Before running this program make
+ * sure the administrator has allocated enough default sized huge pages
+ * to cover the 256 MB allocation.
+ *
+ * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
+ * That means the addresses starting with 0x800000... will need to be
+ * specified.  Specifying a fixed address is not required on ppc64, i386
+ * or x86_64.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define LENGTH (256UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000 /* arch specific */
+#endif
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define ADDR (void *)(0x8000000000000000UL)
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
+#else
+#define ADDR (void *)(0x0UL)
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+#endif
+
+static void check_bytes(char *addr)
+{
+	printf("First hex is %x\n", *((unsigned int *)addr));
+}
+
+static void write_bytes(char *addr)
+{
+	unsigned long i;
+
+	for (i = 0; i < LENGTH; i++)
+		*(addr + i) = (char)i;
+}
+
+static int read_bytes(char *addr)
+{
+	unsigned long i;
+
+	check_bytes(addr);
+	for (i = 0; i < LENGTH; i++)
+		if (*(addr + i) != (char)i) {
+			printf("Mismatch at %lu\n", i);
+			return 1;
+		}
+	return 0;
+}
+
+int main(void)
+{
+	void *addr;
+	int ret;
+
+	addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, -1, 0);
+	if (addr == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
+
+	printf("Returned address is %p\n", addr);
+	check_bytes(addr);
+	write_bytes(addr);
+	ret = read_bytes(addr);
+
+	/* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+	if (munmap(addr, LENGTH)) {
+		perror("munmap");
+		exit(1);
+	}
+
+	return ret;
+}
diff --git a/tools/testing/selftests/vm/map_populate.c b/tools/testing/selftests/vm/map_populate.c
new file mode 100644
index 000000000..6b8aeaa0b
--- /dev/null
+++ b/tools/testing/selftests/vm/map_populate.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Dmitry Safonov, Arista Networks
+ *
+ * MAP_POPULATE | MAP_PRIVATE should COW VMA pages.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifndef MMAP_SZ
+#define MMAP_SZ		4096
+#endif
+
+#define BUG_ON(condition, description)					\
+	do {								\
+		if (condition) {					\
+			fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \
+				__LINE__, (description), strerror(errno)); \
+			exit(1);					\
+		}							\
+	} while (0)
+
+static int parent_f(int sock, unsigned long *smap, int child)
+{
+	int status, ret;
+
+	ret = read(sock, &status, sizeof(int));
+	BUG_ON(ret <= 0, "read(sock)");
+
+	*smap = 0x22222BAD;
+	ret = msync(smap, MMAP_SZ, MS_SYNC);
+	BUG_ON(ret, "msync()");
+
+	ret = write(sock, &status, sizeof(int));
+	BUG_ON(ret <= 0, "write(sock)");
+
+	waitpid(child, &status, 0);
+	BUG_ON(!WIFEXITED(status), "child in unexpected state");
+
+	return WEXITSTATUS(status);
+}
+
+static int child_f(int sock, unsigned long *smap, int fd)
+{
+	int ret, buf = 0;
+
+	smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_POPULATE, fd, 0);
+	BUG_ON(smap == MAP_FAILED, "mmap()");
+
+	BUG_ON(*smap != 0xdeadbabe, "MAP_PRIVATE | MAP_POPULATE changed file");
+
+	ret = write(sock, &buf, sizeof(int));
+	BUG_ON(ret <= 0, "write(sock)");
+
+	ret = read(sock, &buf, sizeof(int));
+	BUG_ON(ret <= 0, "read(sock)");
+
+	BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page");
+	BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted");
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int sock[2], child, ret;
+	FILE *ftmp;
+	unsigned long *smap;
+
+	ftmp = tmpfile();
+	BUG_ON(ftmp == 0, "tmpfile()");
+
+	ret = ftruncate(fileno(ftmp), MMAP_SZ);
+	BUG_ON(ret, "ftruncate()");
+
+	smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+			MAP_SHARED, fileno(ftmp), 0);
+	BUG_ON(smap == MAP_FAILED, "mmap()");
+
+	*smap = 0xdeadbabe;
+	/* Probably unnecessary, but let it be. */
+	ret = msync(smap, MMAP_SZ, MS_SYNC);
+	BUG_ON(ret, "msync()");
+
+	ret = socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sock);
+	BUG_ON(ret, "socketpair()");
+
+	child = fork();
+	BUG_ON(child == -1, "fork()");
+
+	if (child) {
+		ret = close(sock[0]);
+		BUG_ON(ret, "close()");
+
+		return parent_f(sock[1], smap, child);
+	}
+
+	ret = close(sock[1]);
+	BUG_ON(ret, "close()");
+
+	return child_f(sock[0], smap, fileno(ftmp));
+}
diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c
new file mode 100644
index 000000000..ff4d72eb7
--- /dev/null
+++ b/tools/testing/selftests/vm/mlock-random-test.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * It tests the mlock/mlock2() when they are invoked
+ * on randomly memory region.
+ */
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/capability.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <time.h>
+#include "mlock2.h"
+
+#define CHUNK_UNIT (128 * 1024)
+#define MLOCK_RLIMIT_SIZE (CHUNK_UNIT * 2)
+#define MLOCK_WITHIN_LIMIT_SIZE CHUNK_UNIT
+#define MLOCK_OUTOF_LIMIT_SIZE (CHUNK_UNIT * 3)
+
+#define TEST_LOOP 100
+#define PAGE_ALIGN(size, ps) (((size) + ((ps) - 1)) & ~((ps) - 1))
+
+int set_cap_limits(rlim_t max)
+{
+	struct rlimit new;
+	cap_t cap = cap_init();
+
+	new.rlim_cur = max;
+	new.rlim_max = max;
+	if (setrlimit(RLIMIT_MEMLOCK, &new)) {
+		perror("setrlimit() returns error\n");
+		return -1;
+	}
+
+	/* drop capabilities including CAP_IPC_LOCK */
+	if (cap_set_proc(cap)) {
+		perror("cap_set_proc() returns error\n");
+		return -2;
+	}
+
+	return 0;
+}
+
+int get_proc_locked_vm_size(void)
+{
+	FILE *f;
+	int ret = -1;
+	char line[1024] = {0};
+	unsigned long lock_size = 0;
+
+	f = fopen("/proc/self/status", "r");
+	if (!f) {
+		perror("fopen");
+		return -1;
+	}
+
+	while (fgets(line, 1024, f)) {
+		if (strstr(line, "VmLck")) {
+			ret = sscanf(line, "VmLck:\t%8lu kB", &lock_size);
+			if (ret <= 0) {
+				printf("sscanf() on VmLck error: %s: %d\n",
+						line, ret);
+				fclose(f);
+				return -1;
+			}
+			fclose(f);
+			return (int)(lock_size << 10);
+		}
+	}
+
+	perror("cann't parse VmLck in /proc/self/status\n");
+	fclose(f);
+	return -1;
+}
+
+/*
+ * Get the MMUPageSize of the memory region including input
+ * address from proc file.
+ *
+ * return value: on error case, 0 will be returned.
+ * Otherwise the page size(in bytes) is returned.
+ */
+int get_proc_page_size(unsigned long addr)
+{
+	FILE *smaps;
+	char *line;
+	unsigned long mmupage_size = 0;
+	size_t size;
+
+	smaps = seek_to_smaps_entry(addr);
+	if (!smaps) {
+		printf("Unable to parse /proc/self/smaps\n");
+		return 0;
+	}
+
+	while (getline(&line, &size, smaps) > 0) {
+		if (!strstr(line, "MMUPageSize")) {
+			free(line);
+			line = NULL;
+			size = 0;
+			continue;
+		}
+
+		/* found the MMUPageSize of this section */
+		if (sscanf(line, "MMUPageSize:    %8lu kB",
+					&mmupage_size) < 1) {
+			printf("Unable to parse smaps entry for Size:%s\n",
+					line);
+			break;
+		}
+
+	}
+	free(line);
+	if (smaps)
+		fclose(smaps);
+	return mmupage_size << 10;
+}
+
+/*
+ * Test mlock/mlock2() on provided memory chunk.
+ * It expects the mlock/mlock2() to be successful (within rlimit)
+ *
+ * With allocated memory chunk [p, p + alloc_size), this
+ * test will choose start/len randomly to perform mlock/mlock2
+ * [start, start +  len] memory range. The range is within range
+ * of the allocated chunk.
+ *
+ * The memory region size alloc_size is within the rlimit.
+ * So we always expect a success of mlock/mlock2.
+ *
+ * VmLck is assumed to be 0 before this test.
+ *
+ *    return value: 0 - success
+ *    else: failure
+ */
+int test_mlock_within_limit(char *p, int alloc_size)
+{
+	int i;
+	int ret = 0;
+	int locked_vm_size = 0;
+	struct rlimit cur;
+	int page_size = 0;
+
+	getrlimit(RLIMIT_MEMLOCK, &cur);
+	if (cur.rlim_cur < alloc_size) {
+		printf("alloc_size[%d] < %u rlimit,lead to mlock failure\n",
+				alloc_size, (unsigned int)cur.rlim_cur);
+		return -1;
+	}
+
+	srand(time(NULL));
+	for (i = 0; i < TEST_LOOP; i++) {
+		/*
+		 * - choose mlock/mlock2 randomly
+		 * - choose lock_size randomly but lock_size < alloc_size
+		 * - choose start_offset randomly but p+start_offset+lock_size
+		 *   < p+alloc_size
+		 */
+		int is_mlock = !!(rand() % 2);
+		int lock_size = rand() % alloc_size;
+		int start_offset = rand() % (alloc_size - lock_size);
+
+		if (is_mlock)
+			ret = mlock(p + start_offset, lock_size);
+		else
+			ret = mlock2_(p + start_offset, lock_size,
+				       MLOCK_ONFAULT);
+
+		if (ret) {
+			printf("%s() failure at |%p(%d)| mlock:|%p(%d)|\n",
+					is_mlock ? "mlock" : "mlock2",
+					p, alloc_size,
+					p + start_offset, lock_size);
+			return ret;
+		}
+	}
+
+	/*
+	 * Check VmLck left by the tests.
+	 */
+	locked_vm_size = get_proc_locked_vm_size();
+	page_size = get_proc_page_size((unsigned long)p);
+	if (page_size == 0) {
+		printf("cannot get proc MMUPageSize\n");
+		return -1;
+	}
+
+	if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size) {
+		printf("test_mlock_within_limit() left VmLck:%d on %d chunk\n",
+				locked_vm_size, alloc_size);
+		return -1;
+	}
+
+	return 0;
+}
+
+
+/*
+ * We expect the mlock/mlock2() to be fail (outof limitation)
+ *
+ * With allocated memory chunk [p, p + alloc_size), this
+ * test will randomly choose start/len and perform mlock/mlock2
+ * on [start, start+len] range.
+ *
+ * The memory region size alloc_size is above the rlimit.
+ * And the len to be locked is higher than rlimit.
+ * So we always expect a failure of mlock/mlock2.
+ * No locked page number should be increased as a side effect.
+ *
+ *    return value: 0 - success
+ *    else: failure
+ */
+int test_mlock_outof_limit(char *p, int alloc_size)
+{
+	int i;
+	int ret = 0;
+	int locked_vm_size = 0, old_locked_vm_size = 0;
+	struct rlimit cur;
+
+	getrlimit(RLIMIT_MEMLOCK, &cur);
+	if (cur.rlim_cur >= alloc_size) {
+		printf("alloc_size[%d] >%u rlimit, violates test condition\n",
+				alloc_size, (unsigned int)cur.rlim_cur);
+		return -1;
+	}
+
+	old_locked_vm_size = get_proc_locked_vm_size();
+	srand(time(NULL));
+	for (i = 0; i < TEST_LOOP; i++) {
+		int is_mlock = !!(rand() % 2);
+		int lock_size = (rand() % (alloc_size - cur.rlim_cur))
+			+ cur.rlim_cur;
+		int start_offset = rand() % (alloc_size - lock_size);
+
+		if (is_mlock)
+			ret = mlock(p + start_offset, lock_size);
+		else
+			ret = mlock2_(p + start_offset, lock_size,
+					MLOCK_ONFAULT);
+		if (ret == 0) {
+			printf("%s() succeeds? on %p(%d) mlock%p(%d)\n",
+					is_mlock ? "mlock" : "mlock2",
+					p, alloc_size,
+					p + start_offset, lock_size);
+			return -1;
+		}
+	}
+
+	locked_vm_size = get_proc_locked_vm_size();
+	if (locked_vm_size != old_locked_vm_size) {
+		printf("tests leads to new mlocked page: old[%d], new[%d]\n",
+				old_locked_vm_size,
+				locked_vm_size);
+		return -1;
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	char *p = NULL;
+	int ret = 0;
+
+	if (set_cap_limits(MLOCK_RLIMIT_SIZE))
+		return -1;
+
+	p = malloc(MLOCK_WITHIN_LIMIT_SIZE);
+	if (p == NULL) {
+		perror("malloc() failure\n");
+		return -1;
+	}
+	ret = test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE);
+	if (ret)
+		return ret;
+	munlock(p, MLOCK_WITHIN_LIMIT_SIZE);
+	free(p);
+
+
+	p = malloc(MLOCK_OUTOF_LIMIT_SIZE);
+	if (p == NULL) {
+		perror("malloc() failure\n");
+		return -1;
+	}
+	ret = test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE);
+	if (ret)
+		return ret;
+	munlock(p, MLOCK_OUTOF_LIMIT_SIZE);
+	free(p);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c
new file mode 100644
index 000000000..11b2301f3
--- /dev/null
+++ b/tools/testing/selftests/vm/mlock2-tests.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdbool.h>
+#include "mlock2.h"
+
+#include "../kselftest.h"
+
+struct vm_boundaries {
+	unsigned long start;
+	unsigned long end;
+};
+
+static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
+{
+	FILE *file;
+	int ret = 1;
+	char line[1024] = {0};
+	char *end_addr;
+	char *stop;
+	unsigned long start;
+	unsigned long end;
+
+	if (!area)
+		return ret;
+
+	file = fopen("/proc/self/maps", "r");
+	if (!file) {
+		perror("fopen");
+		return ret;
+	}
+
+	memset(area, 0, sizeof(struct vm_boundaries));
+
+	while(fgets(line, 1024, file)) {
+		end_addr = strchr(line, '-');
+		if (!end_addr) {
+			printf("cannot parse /proc/self/maps\n");
+			goto out;
+		}
+		*end_addr = '\0';
+		end_addr++;
+		stop = strchr(end_addr, ' ');
+		if (!stop) {
+			printf("cannot parse /proc/self/maps\n");
+			goto out;
+		}
+		stop = '\0';
+
+		sscanf(line, "%lx", &start);
+		sscanf(end_addr, "%lx", &end);
+
+		if (start <= addr && end > addr) {
+			area->start = start;
+			area->end = end;
+			ret = 0;
+			goto out;
+		}
+	}
+out:
+	fclose(file);
+	return ret;
+}
+
+#define VMFLAGS "VmFlags:"
+
+static bool is_vmflag_set(unsigned long addr, const char *vmflag)
+{
+	char *line = NULL;
+	char *flags;
+	size_t size = 0;
+	bool ret = false;
+	FILE *smaps;
+
+	smaps = seek_to_smaps_entry(addr);
+	if (!smaps) {
+		printf("Unable to parse /proc/self/smaps\n");
+		goto out;
+	}
+
+	while (getline(&line, &size, smaps) > 0) {
+		if (!strstr(line, VMFLAGS)) {
+			free(line);
+			line = NULL;
+			size = 0;
+			continue;
+		}
+
+		flags = line + strlen(VMFLAGS);
+		ret = (strstr(flags, vmflag) != NULL);
+		goto out;
+	}
+
+out:
+	free(line);
+	fclose(smaps);
+	return ret;
+}
+
+#define SIZE "Size:"
+#define RSS  "Rss:"
+#define LOCKED "lo"
+
+static unsigned long get_value_for_name(unsigned long addr, const char *name)
+{
+	char *line = NULL;
+	size_t size = 0;
+	char *value_ptr;
+	FILE *smaps = NULL;
+	unsigned long value = -1UL;
+
+	smaps = seek_to_smaps_entry(addr);
+	if (!smaps) {
+		printf("Unable to parse /proc/self/smaps\n");
+		goto out;
+	}
+
+	while (getline(&line, &size, smaps) > 0) {
+		if (!strstr(line, name)) {
+			free(line);
+			line = NULL;
+			size = 0;
+			continue;
+		}
+
+		value_ptr = line + strlen(name);
+		if (sscanf(value_ptr, "%lu kB", &value) < 1) {
+			printf("Unable to parse smaps entry for Size\n");
+			goto out;
+		}
+		break;
+	}
+
+out:
+	if (smaps)
+		fclose(smaps);
+	free(line);
+	return value;
+}
+
+static bool is_vma_lock_on_fault(unsigned long addr)
+{
+	bool locked;
+	unsigned long vma_size, vma_rss;
+
+	locked = is_vmflag_set(addr, LOCKED);
+	if (!locked)
+		return false;
+
+	vma_size = get_value_for_name(addr, SIZE);
+	vma_rss = get_value_for_name(addr, RSS);
+
+	/* only one page is faulted in */
+	return (vma_rss < vma_size);
+}
+
+#define PRESENT_BIT     0x8000000000000000ULL
+#define PFN_MASK        0x007FFFFFFFFFFFFFULL
+#define UNEVICTABLE_BIT (1UL << 18)
+
+static int lock_check(unsigned long addr)
+{
+	bool locked;
+	unsigned long vma_size, vma_rss;
+
+	locked = is_vmflag_set(addr, LOCKED);
+	if (!locked)
+		return false;
+
+	vma_size = get_value_for_name(addr, SIZE);
+	vma_rss = get_value_for_name(addr, RSS);
+
+	return (vma_rss == vma_size);
+}
+
+static int unlock_lock_check(char *map)
+{
+	if (is_vmflag_set((unsigned long)map, LOCKED)) {
+		printf("VMA flag %s is present on page 1 after unlock\n", LOCKED);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int test_mlock_lock()
+{
+	char *map;
+	int ret = 1;
+	unsigned long page_size = getpagesize();
+
+	map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (map == MAP_FAILED) {
+		perror("test_mlock_locked mmap");
+		goto out;
+	}
+
+	if (mlock2_(map, 2 * page_size, 0)) {
+		if (errno == ENOSYS) {
+			printf("Cannot call new mlock family, skipping test\n");
+			_exit(KSFT_SKIP);
+		}
+		perror("mlock2(0)");
+		goto unmap;
+	}
+
+	if (!lock_check((unsigned long)map))
+		goto unmap;
+
+	/* Now unlock and recheck attributes */
+	if (munlock(map, 2 * page_size)) {
+		perror("munlock()");
+		goto unmap;
+	}
+
+	ret = unlock_lock_check(map);
+
+unmap:
+	munmap(map, 2 * page_size);
+out:
+	return ret;
+}
+
+static int onfault_check(char *map)
+{
+	*map = 'a';
+	if (!is_vma_lock_on_fault((unsigned long)map)) {
+		printf("VMA is not marked for lock on fault\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+static int unlock_onfault_check(char *map)
+{
+	unsigned long page_size = getpagesize();
+
+	if (is_vma_lock_on_fault((unsigned long)map) ||
+	    is_vma_lock_on_fault((unsigned long)map + page_size)) {
+		printf("VMA is still lock on fault after unlock\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+static int test_mlock_onfault()
+{
+	char *map;
+	int ret = 1;
+	unsigned long page_size = getpagesize();
+
+	map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (map == MAP_FAILED) {
+		perror("test_mlock_locked mmap");
+		goto out;
+	}
+
+	if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
+		if (errno == ENOSYS) {
+			printf("Cannot call new mlock family, skipping test\n");
+			_exit(KSFT_SKIP);
+		}
+		perror("mlock2(MLOCK_ONFAULT)");
+		goto unmap;
+	}
+
+	if (onfault_check(map))
+		goto unmap;
+
+	/* Now unlock and recheck attributes */
+	if (munlock(map, 2 * page_size)) {
+		if (errno == ENOSYS) {
+			printf("Cannot call new mlock family, skipping test\n");
+			_exit(KSFT_SKIP);
+		}
+		perror("munlock()");
+		goto unmap;
+	}
+
+	ret = unlock_onfault_check(map);
+unmap:
+	munmap(map, 2 * page_size);
+out:
+	return ret;
+}
+
+static int test_lock_onfault_of_present()
+{
+	char *map;
+	int ret = 1;
+	unsigned long page_size = getpagesize();
+
+	map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (map == MAP_FAILED) {
+		perror("test_mlock_locked mmap");
+		goto out;
+	}
+
+	*map = 'a';
+
+	if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
+		if (errno == ENOSYS) {
+			printf("Cannot call new mlock family, skipping test\n");
+			_exit(KSFT_SKIP);
+		}
+		perror("mlock2(MLOCK_ONFAULT)");
+		goto unmap;
+	}
+
+	if (!is_vma_lock_on_fault((unsigned long)map) ||
+	    !is_vma_lock_on_fault((unsigned long)map + page_size)) {
+		printf("VMA with present pages is not marked lock on fault\n");
+		goto unmap;
+	}
+	ret = 0;
+unmap:
+	munmap(map, 2 * page_size);
+out:
+	return ret;
+}
+
+static int test_munlockall()
+{
+	char *map;
+	int ret = 1;
+	unsigned long page_size = getpagesize();
+
+	map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+	if (map == MAP_FAILED) {
+		perror("test_munlockall mmap");
+		goto out;
+	}
+
+	if (mlockall(MCL_CURRENT)) {
+		perror("mlockall(MCL_CURRENT)");
+		goto out;
+	}
+
+	if (!lock_check((unsigned long)map))
+		goto unmap;
+
+	if (munlockall()) {
+		perror("munlockall()");
+		goto unmap;
+	}
+
+	if (unlock_lock_check(map))
+		goto unmap;
+
+	munmap(map, 2 * page_size);
+
+	map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+	if (map == MAP_FAILED) {
+		perror("test_munlockall second mmap");
+		goto out;
+	}
+
+	if (mlockall(MCL_CURRENT | MCL_ONFAULT)) {
+		perror("mlockall(MCL_CURRENT | MCL_ONFAULT)");
+		goto unmap;
+	}
+
+	if (onfault_check(map))
+		goto unmap;
+
+	if (munlockall()) {
+		perror("munlockall()");
+		goto unmap;
+	}
+
+	if (unlock_onfault_check(map))
+		goto unmap;
+
+	if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
+		perror("mlockall(MCL_CURRENT | MCL_FUTURE)");
+		goto out;
+	}
+
+	if (!lock_check((unsigned long)map))
+		goto unmap;
+
+	if (munlockall()) {
+		perror("munlockall()");
+		goto unmap;
+	}
+
+	ret = unlock_lock_check(map);
+
+unmap:
+	munmap(map, 2 * page_size);
+out:
+	munlockall();
+	return ret;
+}
+
+static int test_vma_management(bool call_mlock)
+{
+	int ret = 1;
+	void *map;
+	unsigned long page_size = getpagesize();
+	struct vm_boundaries page1;
+	struct vm_boundaries page2;
+	struct vm_boundaries page3;
+
+	map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+		   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (map == MAP_FAILED) {
+		perror("mmap()");
+		return ret;
+	}
+
+	if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
+		if (errno == ENOSYS) {
+			printf("Cannot call new mlock family, skipping test\n");
+			_exit(KSFT_SKIP);
+		}
+		perror("mlock(ONFAULT)\n");
+		goto out;
+	}
+
+	if (get_vm_area((unsigned long)map, &page1) ||
+	    get_vm_area((unsigned long)map + page_size, &page2) ||
+	    get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+		printf("couldn't find mapping in /proc/self/maps\n");
+		goto out;
+	}
+
+	/*
+	 * Before we unlock a portion, we need to that all three pages are in
+	 * the same VMA.  If they are not we abort this test (Note that this is
+	 * not a failure)
+	 */
+	if (page1.start != page2.start || page2.start != page3.start) {
+		printf("VMAs are not merged to start, aborting test\n");
+		ret = 0;
+		goto out;
+	}
+
+	if (munlock(map + page_size, page_size)) {
+		perror("munlock()");
+		goto out;
+	}
+
+	if (get_vm_area((unsigned long)map, &page1) ||
+	    get_vm_area((unsigned long)map + page_size, &page2) ||
+	    get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+		printf("couldn't find mapping in /proc/self/maps\n");
+		goto out;
+	}
+
+	/* All three VMAs should be different */
+	if (page1.start == page2.start || page2.start == page3.start) {
+		printf("failed to split VMA for munlock\n");
+		goto out;
+	}
+
+	/* Now unlock the first and third page and check the VMAs again */
+	if (munlock(map, page_size * 3)) {
+		perror("munlock()");
+		goto out;
+	}
+
+	if (get_vm_area((unsigned long)map, &page1) ||
+	    get_vm_area((unsigned long)map + page_size, &page2) ||
+	    get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+		printf("couldn't find mapping in /proc/self/maps\n");
+		goto out;
+	}
+
+	/* Now all three VMAs should be the same */
+	if (page1.start != page2.start || page2.start != page3.start) {
+		printf("failed to merge VMAs after munlock\n");
+		goto out;
+	}
+
+	ret = 0;
+out:
+	munmap(map, 3 * page_size);
+	return ret;
+}
+
+static int test_mlockall(int (test_function)(bool call_mlock))
+{
+	int ret = 1;
+
+	if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) {
+		perror("mlockall");
+		return ret;
+	}
+
+	ret = test_function(false);
+	munlockall();
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int ret = 0;
+	ret += test_mlock_lock();
+	ret += test_mlock_onfault();
+	ret += test_munlockall();
+	ret += test_lock_onfault_of_present();
+	ret += test_vma_management(true);
+	ret += test_mlockall(test_vma_management);
+	return ret;
+}
diff --git a/tools/testing/selftests/vm/mlock2.h b/tools/testing/selftests/vm/mlock2.h
new file mode 100644
index 000000000..2a6e76c22
--- /dev/null
+++ b/tools/testing/selftests/vm/mlock2.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef MLOCK_ONFAULT
+#define MLOCK_ONFAULT 1
+#endif
+
+#ifndef MCL_ONFAULT
+#define MCL_ONFAULT (MCL_FUTURE << 1)
+#endif
+
+static int mlock2_(void *start, size_t len, int flags)
+{
+#ifdef __NR_mlock2
+	return syscall(__NR_mlock2, start, len, flags);
+#else
+	errno = ENOSYS;
+	return -1;
+#endif
+}
+
+static FILE *seek_to_smaps_entry(unsigned long addr)
+{
+	FILE *file;
+	char *line = NULL;
+	size_t size = 0;
+	unsigned long start, end;
+	char perms[5];
+	unsigned long offset;
+	char dev[32];
+	unsigned long inode;
+	char path[BUFSIZ];
+
+	file = fopen("/proc/self/smaps", "r");
+	if (!file) {
+		perror("fopen smaps");
+		_exit(1);
+	}
+
+	while (getline(&line, &size, file) > 0) {
+		if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n",
+			   &start, &end, perms, &offset, dev, &inode, path) < 6)
+			goto next;
+
+		if (start <= addr && addr < end)
+			goto out;
+
+next:
+		free(line);
+		line = NULL;
+		size = 0;
+	}
+
+	fclose(file);
+	file = NULL;
+
+out:
+	free(line);
+	return file;
+}
diff --git a/tools/testing/selftests/vm/on-fault-limit.c b/tools/testing/selftests/vm/on-fault-limit.c
new file mode 100644
index 000000000..634d87dfb
--- /dev/null
+++ b/tools/testing/selftests/vm/on-fault-limit.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/mman.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifndef MCL_ONFAULT
+#define MCL_ONFAULT (MCL_FUTURE << 1)
+#endif
+
+static int test_limit(void)
+{
+	int ret = 1;
+	struct rlimit lims;
+	void *map;
+
+	if (getrlimit(RLIMIT_MEMLOCK, &lims)) {
+		perror("getrlimit");
+		return ret;
+	}
+
+	if (mlockall(MCL_ONFAULT | MCL_FUTURE)) {
+		perror("mlockall");
+		return ret;
+	}
+
+	map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
+	if (map != MAP_FAILED)
+		printf("mmap should have failed, but didn't\n");
+	else {
+		ret = 0;
+		munmap(map, 2 * lims.rlim_max);
+	}
+
+	munlockall();
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int ret = 0;
+
+	ret += test_limit();
+	return ret;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
new file mode 100755
index 000000000..584a91ae4
--- /dev/null
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -0,0 +1,214 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#please run as root
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+mnt=./huge
+exitcode=0
+
+#get huge pagesize and freepages from /proc/meminfo
+while read name size unit; do
+	if [ "$name" = "HugePages_Free:" ]; then
+		freepgs=$size
+	fi
+	if [ "$name" = "Hugepagesize:" ]; then
+		hpgsize_KB=$size
+	fi
+done < /proc/meminfo
+
+# Simple hugetlbfs tests have a hardcoded minimum requirement of
+# huge pages totaling 256MB (262144KB) in size.  The userfaultfd
+# hugetlb test requires a minimum of 2 * nr_cpus huge pages.  Take
+# both of these requirements into account and attempt to increase
+# number of huge pages available.
+nr_cpus=$(nproc)
+hpgsize_MB=$((hpgsize_KB / 1024))
+half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
+needmem_KB=$((half_ufd_size_MB * 2 * 1024))
+
+#set proper nr_hugepages
+if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
+	nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+	needpgs=$((needmem_KB / hpgsize_KB))
+	tries=2
+	while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
+		lackpgs=$(( $needpgs - $freepgs ))
+		echo 3 > /proc/sys/vm/drop_caches
+		echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
+		if [ $? -ne 0 ]; then
+			echo "Please run this test as root"
+			exit $ksft_skip
+		fi
+		while read name size unit; do
+			if [ "$name" = "HugePages_Free:" ]; then
+				freepgs=$size
+			fi
+		done < /proc/meminfo
+		tries=$((tries - 1))
+	done
+	if [ $freepgs -lt $needpgs ]; then
+		printf "Not enough huge pages available (%d < %d)\n" \
+		       $freepgs $needpgs
+		exit 1
+	fi
+else
+	echo "no hugetlbfs support in kernel?"
+	exit 1
+fi
+
+mkdir $mnt
+mount -t hugetlbfs none $mnt
+
+echo "---------------------"
+echo "running hugepage-mmap"
+echo "---------------------"
+./hugepage-mmap
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+shmmax=`cat /proc/sys/kernel/shmmax`
+shmall=`cat /proc/sys/kernel/shmall`
+echo 268435456 > /proc/sys/kernel/shmmax
+echo 4194304 > /proc/sys/kernel/shmall
+echo "--------------------"
+echo "running hugepage-shm"
+echo "--------------------"
+./hugepage-shm
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+echo $shmmax > /proc/sys/kernel/shmmax
+echo $shmall > /proc/sys/kernel/shmall
+
+echo "-------------------"
+echo "running map_hugetlb"
+echo "-------------------"
+./map_hugetlb
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "NOTE: The above hugetlb tests provide minimal coverage.  Use"
+echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
+echo "      hugetlb regression testing."
+
+echo "-------------------"
+echo "running userfaultfd"
+echo "-------------------"
+./userfaultfd anon 128 32
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "---------------------------"
+echo "running userfaultfd_hugetlb"
+echo "---------------------------"
+# Test requires source and destination huge pages.  Size of source
+# (half_ufd_size_MB) is passed as argument to test.
+./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+rm -f $mnt/ufd_test_file
+
+echo "-------------------------"
+echo "running userfaultfd_shmem"
+echo "-------------------------"
+./userfaultfd shmem 128 32
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+#cleanup
+umount $mnt
+rm -rf $mnt
+echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+
+echo "-----------------------"
+echo "running compaction_test"
+echo "-----------------------"
+./compaction_test
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "----------------------"
+echo "running on-fault-limit"
+echo "----------------------"
+sudo -u nobody ./on-fault-limit
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running map_populate"
+echo "--------------------"
+./map_populate
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running mlock2-tests"
+echo "--------------------"
+./mlock2-tests
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "-----------------------------"
+echo "running virtual_address_range"
+echo "-----------------------------"
+./virtual_address_range
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
+echo "-----------------------------"
+echo "running virtual address 128TB switch test"
+echo "-----------------------------"
+./va_128TBswitch
+if [ $? -ne 0 ]; then
+    echo "[FAIL]"
+    exitcode=1
+else
+    echo "[PASS]"
+fi
+
+exit $exitcode
diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c
new file mode 100644
index 000000000..361ef7192
--- /dev/null
+++ b/tools/testing/selftests/vm/thuge-gen.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test selecting other page sizes for mmap/shmget.
+
+   Before running this huge pages for each huge page size must have been
+   reserved.
+   For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used.
+   Also shmmax must be increased.
+   And you need to run as root to work around some weird permissions in shm.
+   And nothing using huge pages should run in parallel.
+   When the program aborts you may need to clean up the shm segments with
+   ipcrm -m by hand, like this
+   sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m
+   (warning this will remove all if someone else uses them) */
+
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <glob.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <string.h>
+
+#define err(x) perror(x), exit(1)
+
+#define MAP_HUGE_2MB    (21 << MAP_HUGE_SHIFT)
+#define MAP_HUGE_1GB    (30 << MAP_HUGE_SHIFT)
+#define MAP_HUGE_SHIFT  26
+#define MAP_HUGE_MASK   0x3f
+#if !defined(MAP_HUGETLB)
+#define MAP_HUGETLB	0x40000
+#endif
+
+#define SHM_HUGETLB     04000   /* segment will use huge TLB pages */
+#define SHM_HUGE_SHIFT  26
+#define SHM_HUGE_MASK   0x3f
+#define SHM_HUGE_2MB    (21 << SHM_HUGE_SHIFT)
+#define SHM_HUGE_1GB    (30 << SHM_HUGE_SHIFT)
+
+#define NUM_PAGESIZES   5
+
+#define NUM_PAGES 4
+
+#define Dprintf(fmt...) // printf(fmt)
+
+unsigned long page_sizes[NUM_PAGESIZES];
+int num_page_sizes;
+
+int ilog2(unsigned long v)
+{
+	int l = 0;
+	while ((1UL << l) < v)
+		l++;
+	return l;
+}
+
+void find_pagesizes(void)
+{
+	glob_t g;
+	int i;
+	glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
+	assert(g.gl_pathc <= NUM_PAGESIZES);
+	for (i = 0; i < g.gl_pathc; i++) {
+		sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
+				&page_sizes[i]);
+		page_sizes[i] <<= 10;
+		printf("Found %luMB\n", page_sizes[i] >> 20);
+	}
+	num_page_sizes = g.gl_pathc;
+	globfree(&g);
+}
+
+unsigned long default_huge_page_size(void)
+{
+	unsigned long hps = 0;
+	char *line = NULL;
+	size_t linelen = 0;
+	FILE *f = fopen("/proc/meminfo", "r");
+	if (!f)
+		return 0;
+	while (getline(&line, &linelen, f) > 0) {
+		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
+			hps <<= 10;
+			break;
+		}
+	}
+	free(line);
+	return hps;
+}
+
+void show(unsigned long ps)
+{
+	char buf[100];
+	if (ps == getpagesize())
+		return;
+	printf("%luMB: ", ps >> 20);
+	fflush(stdout);
+	snprintf(buf, sizeof buf,
+		"cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+		ps >> 10);
+	system(buf);
+}
+
+unsigned long read_sysfs(int warn, char *fmt, ...)
+{
+	char *line = NULL;
+	size_t linelen = 0;
+	char buf[100];
+	FILE *f;
+	va_list ap;
+	unsigned long val = 0;
+
+	va_start(ap, fmt);
+	vsnprintf(buf, sizeof buf, fmt, ap);
+	va_end(ap);
+
+	f = fopen(buf, "r");
+	if (!f) {
+		if (warn)
+			printf("missing %s\n", buf);
+		return 0;
+	}
+	if (getline(&line, &linelen, f) > 0) {
+		sscanf(line, "%lu", &val);
+	}
+	fclose(f);
+	free(line);
+	return val;
+}
+
+unsigned long read_free(unsigned long ps)
+{
+	return read_sysfs(ps != getpagesize(),
+			"/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+			ps >> 10);
+}
+
+void test_mmap(unsigned long size, unsigned flags)
+{
+	char *map;
+	unsigned long before, after;
+	int err;
+
+	before = read_free(size);
+	map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE,
+			MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, -1, 0);
+
+	if (map == (char *)-1) err("mmap");
+	memset(map, 0xff, size*NUM_PAGES);
+	after = read_free(size);
+	Dprintf("before %lu after %lu diff %ld size %lu\n",
+		before, after, before - after, size);
+	assert(size == getpagesize() || (before - after) == NUM_PAGES);
+	show(size);
+	err = munmap(map, size);
+	assert(!err);
+}
+
+void test_shmget(unsigned long size, unsigned flags)
+{
+	int id;
+	unsigned long before, after;
+	int err;
+
+	before = read_free(size);
+	id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags);
+	if (id < 0) err("shmget");
+
+	struct shm_info i;
+	if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl");
+	Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss);
+
+
+	Dprintf("id %d\n", id);
+	char *map = shmat(id, NULL, 0600);
+	if (map == (char*)-1) err("shmat");
+
+	shmctl(id, IPC_RMID, NULL);
+
+	memset(map, 0xff, size*NUM_PAGES);
+	after = read_free(size);
+
+	Dprintf("before %lu after %lu diff %ld size %lu\n",
+		before, after, before - after, size);
+	assert(size == getpagesize() || (before - after) == NUM_PAGES);
+	show(size);
+	err = shmdt(map);
+	assert(!err);
+}
+
+void sanity_checks(void)
+{
+	int i;
+	unsigned long largest = getpagesize();
+
+	for (i = 0; i < num_page_sizes; i++) {
+		if (page_sizes[i] > largest)
+			largest = page_sizes[i];
+
+		if (read_free(page_sizes[i]) < NUM_PAGES) {
+			printf("Not enough huge pages for page size %lu MB, need %u\n",
+				page_sizes[i] >> 20,
+				NUM_PAGES);
+			exit(0);
+		}
+	}
+
+	if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) {
+		printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES);
+		exit(0);
+	}
+
+#if defined(__x86_64__)
+	if (largest != 1U<<30) {
+		printf("No GB pages available on x86-64\n"
+		       "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
+		exit(0);
+	}
+#endif
+}
+
+int main(void)
+{
+	int i;
+	unsigned default_hps = default_huge_page_size();
+
+	find_pagesizes();
+
+	sanity_checks();
+
+	for (i = 0; i < num_page_sizes; i++) {
+		unsigned long ps = page_sizes[i];
+		int arg = ilog2(ps) << MAP_HUGE_SHIFT;
+		printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg);
+		test_mmap(ps, MAP_HUGETLB | arg);
+	}
+	printf("Testing default huge mmap\n");
+	test_mmap(default_hps, SHM_HUGETLB);
+
+	puts("Testing non-huge shmget");
+	test_shmget(getpagesize(), 0);
+
+	for (i = 0; i < num_page_sizes; i++) {
+		unsigned long ps = page_sizes[i];
+		int arg = ilog2(ps) << SHM_HUGE_SHIFT;
+		printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg);
+		test_shmget(ps, SHM_HUGETLB | arg);
+	}
+	puts("default huge shmget");
+	test_shmget(default_hps, SHM_HUGETLB);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c
new file mode 100644
index 000000000..fd7f1b4a9
--- /dev/null
+++ b/tools/testing/selftests/vm/transhuge-stress.c
@@ -0,0 +1,144 @@
+/*
+ * Stress test for transparent huge pages, memory compaction and migration.
+ *
+ * Authors: Konstantin Khlebnikov <koct9i@gmail.com>
+ *
+ * This is free and unencumbered software released into the public domain.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <err.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#define PAGE_SHIFT 12
+#define HPAGE_SHIFT 21
+
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#define HPAGE_SIZE (1 << HPAGE_SHIFT)
+
+#define PAGEMAP_PRESENT(ent)	(((ent) & (1ull << 63)) != 0)
+#define PAGEMAP_PFN(ent)	((ent) & ((1ull << 55) - 1))
+
+int pagemap_fd;
+
+int64_t allocate_transhuge(void *ptr)
+{
+	uint64_t ent[2];
+
+	/* drop pmd */
+	if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
+				MAP_FIXED | MAP_ANONYMOUS |
+				MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
+		errx(2, "mmap transhuge");
+
+	if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
+		err(2, "MADV_HUGEPAGE");
+
+	/* allocate transparent huge page */
+	*(volatile void **)ptr = ptr;
+
+	if (pread(pagemap_fd, ent, sizeof(ent),
+			(uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
+		err(2, "read pagemap");
+
+	if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
+	    PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
+	    !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
+		return PAGEMAP_PFN(ent[0]);
+
+	return -1;
+}
+
+int main(int argc, char **argv)
+{
+	size_t ram, len;
+	void *ptr, *p;
+	struct timespec a, b;
+	double s;
+	uint8_t *map;
+	size_t map_len;
+
+	ram = sysconf(_SC_PHYS_PAGES);
+	if (ram > SIZE_MAX / sysconf(_SC_PAGESIZE) / 4)
+		ram = SIZE_MAX / 4;
+	else
+		ram *= sysconf(_SC_PAGESIZE);
+
+	if (argc == 1)
+		len = ram;
+	else if (!strcmp(argv[1], "-h"))
+		errx(1, "usage: %s [size in MiB]", argv[0]);
+	else
+		len = atoll(argv[1]) << 20;
+
+	warnx("allocate %zd transhuge pages, using %zd MiB virtual memory"
+	      " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20,
+	      len >> (20 + HPAGE_SHIFT - PAGE_SHIFT - 1));
+
+	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+	if (pagemap_fd < 0)
+		err(2, "open pagemap");
+
+	len -= len % HPAGE_SIZE;
+	ptr = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE,
+			MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
+	if (ptr == MAP_FAILED)
+		err(2, "initial mmap");
+	ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE;
+
+	if (madvise(ptr, len, MADV_HUGEPAGE))
+		err(2, "MADV_HUGEPAGE");
+
+	map_len = ram >> (HPAGE_SHIFT - 1);
+	map = malloc(map_len);
+	if (!map)
+		errx(2, "map malloc");
+
+	while (1) {
+		int nr_succeed = 0, nr_failed = 0, nr_pages = 0;
+
+		memset(map, 0, map_len);
+
+		clock_gettime(CLOCK_MONOTONIC, &a);
+		for (p = ptr; p < ptr + len; p += HPAGE_SIZE) {
+			int64_t pfn;
+
+			pfn = allocate_transhuge(p);
+
+			if (pfn < 0) {
+				nr_failed++;
+			} else {
+				size_t idx = pfn >> (HPAGE_SHIFT - PAGE_SHIFT);
+
+				nr_succeed++;
+				if (idx >= map_len) {
+					map = realloc(map, idx + 1);
+					if (!map)
+						errx(2, "map realloc");
+					memset(map + map_len, 0, idx + 1 - map_len);
+					map_len = idx + 1;
+				}
+				if (!map[idx])
+					nr_pages++;
+				map[idx] = 1;
+			}
+
+			/* split transhuge page, keep last page */
+			if (madvise(p, HPAGE_SIZE - PAGE_SIZE, MADV_DONTNEED))
+				err(2, "MADV_DONTNEED");
+		}
+		clock_gettime(CLOCK_MONOTONIC, &b);
+		s = b.tv_sec - a.tv_sec + (b.tv_nsec - a.tv_nsec) / 1000000000.;
+
+		warnx("%.3f s/loop, %.3f ms/page, %10.3f MiB/s\t"
+		      "%4d succeed, %4d failed, %4d different pages",
+		      s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20),
+		      nr_succeed, nr_failed, nr_pages);
+	}
+}
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
new file mode 100644
index 000000000..b2c7043c0
--- /dev/null
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -0,0 +1,1333 @@
+/*
+ * Stress userfaultfd syscall.
+ *
+ *  Copyright (C) 2015  Red Hat, Inc.
+ *
+ *  This work is licensed under the terms of the GNU GPL, version 2. See
+ *  the COPYING file in the top-level directory.
+ *
+ * This test allocates two virtual areas and bounces the physical
+ * memory across the two virtual areas (from area_src to area_dst)
+ * using userfaultfd.
+ *
+ * There are three threads running per CPU:
+ *
+ * 1) one per-CPU thread takes a per-page pthread_mutex in a random
+ *    page of the area_dst (while the physical page may still be in
+ *    area_src), and increments a per-page counter in the same page,
+ *    and checks its value against a verification region.
+ *
+ * 2) another per-CPU thread handles the userfaults generated by
+ *    thread 1 above. userfaultfd blocking reads or poll() modes are
+ *    exercised interleaved.
+ *
+ * 3) one last per-CPU thread transfers the memory in the background
+ *    at maximum bandwidth (if not already transferred by thread
+ *    2). Each cpu thread takes cares of transferring a portion of the
+ *    area.
+ *
+ * When all threads of type 3 completed the transfer, one bounce is
+ * complete. area_src and area_dst are then swapped. All threads are
+ * respawned and so the bounce is immediately restarted in the
+ * opposite direction.
+ *
+ * per-CPU threads 1 by triggering userfaults inside
+ * pthread_mutex_lock will also verify the atomicity of the memory
+ * transfer (UFFDIO_COPY).
+ *
+ * The program takes two parameters: the amounts of physical memory in
+ * megabytes (MiB) of the area and the number of bounces to execute.
+ *
+ * # 100MiB 99999 bounces
+ * ./userfaultfd 100 99999
+ *
+ * # 1GiB 99 bounces
+ * ./userfaultfd 1000 99
+ *
+ * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers
+ * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <signal.h>
+#include <poll.h>
+#include <string.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+#include <setjmp.h>
+#include <stdbool.h>
+
+#include "../kselftest.h"
+
+#ifdef __NR_userfaultfd
+
+static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+
+#define BOUNCE_RANDOM		(1<<0)
+#define BOUNCE_RACINGFAULTS	(1<<1)
+#define BOUNCE_VERIFY		(1<<2)
+#define BOUNCE_POLL		(1<<3)
+static int bounces;
+
+#define TEST_ANON	1
+#define TEST_HUGETLB	2
+#define TEST_SHMEM	3
+static int test_type;
+
+/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
+#define ALARM_INTERVAL_SECS 10
+static volatile bool test_uffdio_copy_eexist = true;
+static volatile bool test_uffdio_zeropage_eexist = true;
+
+static bool map_shared;
+static int huge_fd;
+static char *huge_fd_off0;
+static unsigned long long *count_verify;
+static int uffd, uffd_flags, finished, *pipefd;
+static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
+static char *zeropage;
+pthread_attr_t attr;
+
+/* pthread_mutex_t starts at page offset 0 */
+#define area_mutex(___area, ___nr)					\
+	((pthread_mutex_t *) ((___area) + (___nr)*page_size))
+/*
+ * count is placed in the page after pthread_mutex_t naturally aligned
+ * to avoid non alignment faults on non-x86 archs.
+ */
+#define area_count(___area, ___nr)					\
+	((volatile unsigned long long *) ((unsigned long)		\
+				 ((___area) + (___nr)*page_size +	\
+				  sizeof(pthread_mutex_t) +		\
+				  sizeof(unsigned long long) - 1) &	\
+				 ~(unsigned long)(sizeof(unsigned long long) \
+						  -  1)))
+
+static int anon_release_pages(char *rel_area)
+{
+	int ret = 0;
+
+	if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
+		perror("madvise");
+		ret = 1;
+	}
+
+	return ret;
+}
+
+static void anon_allocate_area(void **alloc_area)
+{
+	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+			   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (*alloc_area == MAP_FAILED) {
+		fprintf(stderr, "mmap of anonymous memory failed");
+		*alloc_area = NULL;
+	}
+}
+
+static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+}
+
+/* HugeTLB memory */
+static int hugetlb_release_pages(char *rel_area)
+{
+	int ret = 0;
+
+	if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				rel_area == huge_fd_off0 ? 0 :
+				nr_pages * page_size,
+				nr_pages * page_size)) {
+		perror("fallocate");
+		ret = 1;
+	}
+
+	return ret;
+}
+
+
+static void hugetlb_allocate_area(void **alloc_area)
+{
+	void *area_alias = NULL;
+	char **alloc_area_alias;
+	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+			   (map_shared ? MAP_SHARED : MAP_PRIVATE) |
+			   MAP_HUGETLB,
+			   huge_fd, *alloc_area == area_src ? 0 :
+			   nr_pages * page_size);
+	if (*alloc_area == MAP_FAILED) {
+		fprintf(stderr, "mmap of hugetlbfs file failed\n");
+		*alloc_area = NULL;
+	}
+
+	if (map_shared) {
+		area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+				  MAP_SHARED | MAP_HUGETLB,
+				  huge_fd, *alloc_area == area_src ? 0 :
+				  nr_pages * page_size);
+		if (area_alias == MAP_FAILED) {
+			if (munmap(*alloc_area, nr_pages * page_size) < 0)
+				perror("hugetlb munmap"), exit(1);
+			*alloc_area = NULL;
+			return;
+		}
+	}
+	if (*alloc_area == area_src) {
+		huge_fd_off0 = *alloc_area;
+		alloc_area_alias = &area_src_alias;
+	} else {
+		alloc_area_alias = &area_dst_alias;
+	}
+	if (area_alias)
+		*alloc_area_alias = area_alias;
+}
+
+static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+	if (!map_shared)
+		return;
+	/*
+	 * We can't zap just the pagetable with hugetlbfs because
+	 * MADV_DONTEED won't work. So exercise -EEXIST on a alias
+	 * mapping where the pagetables are not established initially,
+	 * this way we'll exercise the -EEXEC at the fs level.
+	 */
+	*start = (unsigned long) area_dst_alias + offset;
+}
+
+/* Shared memory */
+static int shmem_release_pages(char *rel_area)
+{
+	int ret = 0;
+
+	if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
+		perror("madvise");
+		ret = 1;
+	}
+
+	return ret;
+}
+
+static void shmem_allocate_area(void **alloc_area)
+{
+	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+			   MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+	if (*alloc_area == MAP_FAILED) {
+		fprintf(stderr, "shared memory mmap failed\n");
+		*alloc_area = NULL;
+	}
+}
+
+struct uffd_test_ops {
+	unsigned long expected_ioctls;
+	void (*allocate_area)(void **alloc_area);
+	int (*release_pages)(char *rel_area);
+	void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
+};
+
+#define ANON_EXPECTED_IOCTLS		((1 << _UFFDIO_WAKE) | \
+					 (1 << _UFFDIO_COPY) | \
+					 (1 << _UFFDIO_ZEROPAGE))
+
+static struct uffd_test_ops anon_uffd_test_ops = {
+	.expected_ioctls = ANON_EXPECTED_IOCTLS,
+	.allocate_area	= anon_allocate_area,
+	.release_pages	= anon_release_pages,
+	.alias_mapping = noop_alias_mapping,
+};
+
+static struct uffd_test_ops shmem_uffd_test_ops = {
+	.expected_ioctls = ANON_EXPECTED_IOCTLS,
+	.allocate_area	= shmem_allocate_area,
+	.release_pages	= shmem_release_pages,
+	.alias_mapping = noop_alias_mapping,
+};
+
+static struct uffd_test_ops hugetlb_uffd_test_ops = {
+	.expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
+	.allocate_area	= hugetlb_allocate_area,
+	.release_pages	= hugetlb_release_pages,
+	.alias_mapping = hugetlb_alias_mapping,
+};
+
+static struct uffd_test_ops *uffd_test_ops;
+
+static int my_bcmp(char *str1, char *str2, size_t n)
+{
+	unsigned long i;
+	for (i = 0; i < n; i++)
+		if (str1[i] != str2[i])
+			return 1;
+	return 0;
+}
+
+static void *locking_thread(void *arg)
+{
+	unsigned long cpu = (unsigned long) arg;
+	struct random_data rand;
+	unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
+	int32_t rand_nr;
+	unsigned long long count;
+	char randstate[64];
+	unsigned int seed;
+	time_t start;
+
+	if (bounces & BOUNCE_RANDOM) {
+		seed = (unsigned int) time(NULL) - bounces;
+		if (!(bounces & BOUNCE_RACINGFAULTS))
+			seed += cpu;
+		bzero(&rand, sizeof(rand));
+		bzero(&randstate, sizeof(randstate));
+		if (initstate_r(seed, randstate, sizeof(randstate), &rand))
+			fprintf(stderr, "srandom_r error\n"), exit(1);
+	} else {
+		page_nr = -bounces;
+		if (!(bounces & BOUNCE_RACINGFAULTS))
+			page_nr += cpu * nr_pages_per_cpu;
+	}
+
+	while (!finished) {
+		if (bounces & BOUNCE_RANDOM) {
+			if (random_r(&rand, &rand_nr))
+				fprintf(stderr, "random_r 1 error\n"), exit(1);
+			page_nr = rand_nr;
+			if (sizeof(page_nr) > sizeof(rand_nr)) {
+				if (random_r(&rand, &rand_nr))
+					fprintf(stderr, "random_r 2 error\n"), exit(1);
+				page_nr |= (((unsigned long) rand_nr) << 16) <<
+					   16;
+			}
+		} else
+			page_nr += 1;
+		page_nr %= nr_pages;
+
+		start = time(NULL);
+		if (bounces & BOUNCE_VERIFY) {
+			count = *area_count(area_dst, page_nr);
+			if (!count)
+				fprintf(stderr,
+					"page_nr %lu wrong count %Lu %Lu\n",
+					page_nr, count,
+					count_verify[page_nr]), exit(1);
+
+
+			/*
+			 * We can't use bcmp (or memcmp) because that
+			 * returns 0 erroneously if the memory is
+			 * changing under it (even if the end of the
+			 * page is never changing and always
+			 * different).
+			 */
+#if 1
+			if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
+				     page_size))
+				fprintf(stderr,
+					"my_bcmp page_nr %lu wrong count %Lu %Lu\n",
+					page_nr, count,
+					count_verify[page_nr]), exit(1);
+#else
+			unsigned long loops;
+
+			loops = 0;
+			/* uncomment the below line to test with mutex */
+			/* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
+			while (!bcmp(area_dst + page_nr * page_size, zeropage,
+				     page_size)) {
+				loops += 1;
+				if (loops > 10)
+					break;
+			}
+			/* uncomment below line to test with mutex */
+			/* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
+			if (loops) {
+				fprintf(stderr,
+					"page_nr %lu all zero thread %lu %p %lu\n",
+					page_nr, cpu, area_dst + page_nr * page_size,
+					loops);
+				if (loops > 10)
+					exit(1);
+			}
+#endif
+		}
+
+		pthread_mutex_lock(area_mutex(area_dst, page_nr));
+		count = *area_count(area_dst, page_nr);
+		if (count != count_verify[page_nr]) {
+			fprintf(stderr,
+				"page_nr %lu memory corruption %Lu %Lu\n",
+				page_nr, count,
+				count_verify[page_nr]), exit(1);
+		}
+		count++;
+		*area_count(area_dst, page_nr) = count_verify[page_nr] = count;
+		pthread_mutex_unlock(area_mutex(area_dst, page_nr));
+
+		if (time(NULL) - start > 1)
+			fprintf(stderr,
+				"userfault too slow %ld "
+				"possible false positive with overcommit\n",
+				time(NULL) - start);
+	}
+
+	return NULL;
+}
+
+static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
+			    unsigned long offset)
+{
+	uffd_test_ops->alias_mapping(&uffdio_copy->dst,
+				     uffdio_copy->len,
+				     offset);
+	if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
+		/* real retval in ufdio_copy.copy */
+		if (uffdio_copy->copy != -EEXIST)
+			fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
+				uffdio_copy->copy), exit(1);
+	} else {
+		fprintf(stderr,	"UFFDIO_COPY retry unexpected %Ld\n",
+			uffdio_copy->copy), exit(1);
+	}
+}
+
+static int __copy_page(int ufd, unsigned long offset, bool retry)
+{
+	struct uffdio_copy uffdio_copy;
+
+	if (offset >= nr_pages * page_size)
+		fprintf(stderr, "unexpected offset %lu\n",
+			offset), exit(1);
+	uffdio_copy.dst = (unsigned long) area_dst + offset;
+	uffdio_copy.src = (unsigned long) area_src + offset;
+	uffdio_copy.len = page_size;
+	uffdio_copy.mode = 0;
+	uffdio_copy.copy = 0;
+	if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
+		/* real retval in ufdio_copy.copy */
+		if (uffdio_copy.copy != -EEXIST)
+			fprintf(stderr, "UFFDIO_COPY error %Ld\n",
+				uffdio_copy.copy), exit(1);
+	} else if (uffdio_copy.copy != page_size) {
+		fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
+			uffdio_copy.copy), exit(1);
+	} else {
+		if (test_uffdio_copy_eexist && retry) {
+			test_uffdio_copy_eexist = false;
+			retry_copy_page(ufd, &uffdio_copy, offset);
+		}
+		return 1;
+	}
+	return 0;
+}
+
+static int copy_page_retry(int ufd, unsigned long offset)
+{
+	return __copy_page(ufd, offset, true);
+}
+
+static int copy_page(int ufd, unsigned long offset)
+{
+	return __copy_page(ufd, offset, false);
+}
+
+static void *uffd_poll_thread(void *arg)
+{
+	unsigned long cpu = (unsigned long) arg;
+	struct pollfd pollfd[2];
+	struct uffd_msg msg;
+	struct uffdio_register uffd_reg;
+	int ret;
+	unsigned long offset;
+	char tmp_chr;
+	unsigned long userfaults = 0;
+
+	pollfd[0].fd = uffd;
+	pollfd[0].events = POLLIN;
+	pollfd[1].fd = pipefd[cpu*2];
+	pollfd[1].events = POLLIN;
+
+	for (;;) {
+		ret = poll(pollfd, 2, -1);
+		if (!ret)
+			fprintf(stderr, "poll error %d\n", ret), exit(1);
+		if (ret < 0)
+			perror("poll"), exit(1);
+		if (pollfd[1].revents & POLLIN) {
+			if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
+				fprintf(stderr, "read pipefd error\n"),
+					exit(1);
+			break;
+		}
+		if (!(pollfd[0].revents & POLLIN))
+			fprintf(stderr, "pollfd[0].revents %d\n",
+				pollfd[0].revents), exit(1);
+		ret = read(uffd, &msg, sizeof(msg));
+		if (ret < 0) {
+			if (errno == EAGAIN)
+				continue;
+			perror("nonblocking read error"), exit(1);
+		}
+		switch (msg.event) {
+		default:
+			fprintf(stderr, "unexpected msg event %u\n",
+				msg.event), exit(1);
+			break;
+		case UFFD_EVENT_PAGEFAULT:
+			if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+				fprintf(stderr, "unexpected write fault\n"), exit(1);
+			offset = (char *)(unsigned long)msg.arg.pagefault.address -
+				area_dst;
+			offset &= ~(page_size-1);
+			if (copy_page(uffd, offset))
+				userfaults++;
+			break;
+		case UFFD_EVENT_FORK:
+			close(uffd);
+			uffd = msg.arg.fork.ufd;
+			pollfd[0].fd = uffd;
+			break;
+		case UFFD_EVENT_REMOVE:
+			uffd_reg.range.start = msg.arg.remove.start;
+			uffd_reg.range.len = msg.arg.remove.end -
+				msg.arg.remove.start;
+			if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
+				fprintf(stderr, "remove failure\n"), exit(1);
+			break;
+		case UFFD_EVENT_REMAP:
+			area_dst = (char *)(unsigned long)msg.arg.remap.to;
+			break;
+		}
+	}
+	return (void *)userfaults;
+}
+
+pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void *uffd_read_thread(void *arg)
+{
+	unsigned long *this_cpu_userfaults;
+	struct uffd_msg msg;
+	unsigned long offset;
+	int ret;
+
+	this_cpu_userfaults = (unsigned long *) arg;
+	*this_cpu_userfaults = 0;
+
+	pthread_mutex_unlock(&uffd_read_mutex);
+	/* from here cancellation is ok */
+
+	for (;;) {
+		ret = read(uffd, &msg, sizeof(msg));
+		if (ret != sizeof(msg)) {
+			if (ret < 0)
+				perror("blocking read error"), exit(1);
+			else
+				fprintf(stderr, "short read\n"), exit(1);
+		}
+		if (msg.event != UFFD_EVENT_PAGEFAULT)
+			fprintf(stderr, "unexpected msg event %u\n",
+				msg.event), exit(1);
+		if (bounces & BOUNCE_VERIFY &&
+		    msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+			fprintf(stderr, "unexpected write fault\n"), exit(1);
+		offset = (char *)(unsigned long)msg.arg.pagefault.address -
+			 area_dst;
+		offset &= ~(page_size-1);
+		if (copy_page(uffd, offset))
+			(*this_cpu_userfaults)++;
+	}
+	return (void *)NULL;
+}
+
+static void *background_thread(void *arg)
+{
+	unsigned long cpu = (unsigned long) arg;
+	unsigned long page_nr;
+
+	for (page_nr = cpu * nr_pages_per_cpu;
+	     page_nr < (cpu+1) * nr_pages_per_cpu;
+	     page_nr++)
+		copy_page_retry(uffd, page_nr * page_size);
+
+	return NULL;
+}
+
+static int stress(unsigned long *userfaults)
+{
+	unsigned long cpu;
+	pthread_t locking_threads[nr_cpus];
+	pthread_t uffd_threads[nr_cpus];
+	pthread_t background_threads[nr_cpus];
+	void **_userfaults = (void **) userfaults;
+
+	finished = 0;
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+		if (pthread_create(&locking_threads[cpu], &attr,
+				   locking_thread, (void *)cpu))
+			return 1;
+		if (bounces & BOUNCE_POLL) {
+			if (pthread_create(&uffd_threads[cpu], &attr,
+					   uffd_poll_thread, (void *)cpu))
+				return 1;
+		} else {
+			if (pthread_create(&uffd_threads[cpu], &attr,
+					   uffd_read_thread,
+					   &_userfaults[cpu]))
+				return 1;
+			pthread_mutex_lock(&uffd_read_mutex);
+		}
+		if (pthread_create(&background_threads[cpu], &attr,
+				   background_thread, (void *)cpu))
+			return 1;
+	}
+	for (cpu = 0; cpu < nr_cpus; cpu++)
+		if (pthread_join(background_threads[cpu], NULL))
+			return 1;
+
+	/*
+	 * Be strict and immediately zap area_src, the whole area has
+	 * been transferred already by the background treads. The
+	 * area_src could then be faulted in in a racy way by still
+	 * running uffdio_threads reading zeropages after we zapped
+	 * area_src (but they're guaranteed to get -EEXIST from
+	 * UFFDIO_COPY without writing zero pages into area_dst
+	 * because the background threads already completed).
+	 */
+	if (uffd_test_ops->release_pages(area_src))
+		return 1;
+
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+		char c;
+		if (bounces & BOUNCE_POLL) {
+			if (write(pipefd[cpu*2+1], &c, 1) != 1) {
+				fprintf(stderr, "pipefd write error\n");
+				return 1;
+			}
+			if (pthread_join(uffd_threads[cpu], &_userfaults[cpu]))
+				return 1;
+		} else {
+			if (pthread_cancel(uffd_threads[cpu]))
+				return 1;
+			if (pthread_join(uffd_threads[cpu], NULL))
+				return 1;
+		}
+	}
+
+	finished = 1;
+	for (cpu = 0; cpu < nr_cpus; cpu++)
+		if (pthread_join(locking_threads[cpu], NULL))
+			return 1;
+
+	return 0;
+}
+
+static int userfaultfd_open(int features)
+{
+	struct uffdio_api uffdio_api;
+
+	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+	if (uffd < 0) {
+		fprintf(stderr,
+			"userfaultfd syscall not available in this kernel\n");
+		return 1;
+	}
+	uffd_flags = fcntl(uffd, F_GETFD, NULL);
+
+	uffdio_api.api = UFFD_API;
+	uffdio_api.features = features;
+	if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
+		fprintf(stderr, "UFFDIO_API\n");
+		return 1;
+	}
+	if (uffdio_api.api != UFFD_API) {
+		fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
+		return 1;
+	}
+
+	return 0;
+}
+
+sigjmp_buf jbuf, *sigbuf;
+
+static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
+{
+	if (sig == SIGBUS) {
+		if (sigbuf)
+			siglongjmp(*sigbuf, 1);
+		abort();
+	}
+}
+
+/*
+ * For non-cooperative userfaultfd test we fork() a process that will
+ * generate pagefaults, will mremap the area monitored by the
+ * userfaultfd and at last this process will release the monitored
+ * area.
+ * For the anonymous and shared memory the area is divided into two
+ * parts, the first part is accessed before mremap, and the second
+ * part is accessed after mremap. Since hugetlbfs does not support
+ * mremap, the entire monitored area is accessed in a single pass for
+ * HUGETLB_TEST.
+ * The release of the pages currently generates event for shmem and
+ * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
+ * for hugetlb.
+ * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
+ * monitored area, generate pagefaults and test that signal is delivered.
+ * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
+ * test robustness use case - we release monitored area, fork a process
+ * that will generate pagefaults and verify signal is generated.
+ * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
+ * feature. Using monitor thread, verify no userfault events are generated.
+ */
+static int faulting_process(int signal_test)
+{
+	unsigned long nr;
+	unsigned long long count;
+	unsigned long split_nr_pages;
+	unsigned long lastnr;
+	struct sigaction act;
+	unsigned long signalled = 0;
+
+	if (test_type != TEST_HUGETLB)
+		split_nr_pages = (nr_pages + 1) / 2;
+	else
+		split_nr_pages = nr_pages;
+
+	if (signal_test) {
+		sigbuf = &jbuf;
+		memset(&act, 0, sizeof(act));
+		act.sa_sigaction = sighndl;
+		act.sa_flags = SA_SIGINFO;
+		if (sigaction(SIGBUS, &act, 0)) {
+			perror("sigaction");
+			return 1;
+		}
+		lastnr = (unsigned long)-1;
+	}
+
+	for (nr = 0; nr < split_nr_pages; nr++) {
+		if (signal_test) {
+			if (sigsetjmp(*sigbuf, 1) != 0) {
+				if (nr == lastnr) {
+					fprintf(stderr, "Signal repeated\n");
+					return 1;
+				}
+
+				lastnr = nr;
+				if (signal_test == 1) {
+					if (copy_page(uffd, nr * page_size))
+						signalled++;
+				} else {
+					signalled++;
+					continue;
+				}
+			}
+		}
+
+		count = *area_count(area_dst, nr);
+		if (count != count_verify[nr]) {
+			fprintf(stderr,
+				"nr %lu memory corruption %Lu %Lu\n",
+				nr, count,
+				count_verify[nr]), exit(1);
+		}
+	}
+
+	if (signal_test)
+		return signalled != split_nr_pages;
+
+	if (test_type == TEST_HUGETLB)
+		return 0;
+
+	area_dst = mremap(area_dst, nr_pages * page_size,  nr_pages * page_size,
+			  MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
+	if (area_dst == MAP_FAILED)
+		perror("mremap"), exit(1);
+
+	for (; nr < nr_pages; nr++) {
+		count = *area_count(area_dst, nr);
+		if (count != count_verify[nr]) {
+			fprintf(stderr,
+				"nr %lu memory corruption %Lu %Lu\n",
+				nr, count,
+				count_verify[nr]), exit(1);
+		}
+	}
+
+	if (uffd_test_ops->release_pages(area_dst))
+		return 1;
+
+	for (nr = 0; nr < nr_pages; nr++) {
+		if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
+			fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
+	}
+
+	return 0;
+}
+
+static void retry_uffdio_zeropage(int ufd,
+				  struct uffdio_zeropage *uffdio_zeropage,
+				  unsigned long offset)
+{
+	uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
+				     uffdio_zeropage->range.len,
+				     offset);
+	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
+		if (uffdio_zeropage->zeropage != -EEXIST)
+			fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
+				uffdio_zeropage->zeropage), exit(1);
+	} else {
+		fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
+			uffdio_zeropage->zeropage), exit(1);
+	}
+}
+
+static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
+{
+	struct uffdio_zeropage uffdio_zeropage;
+	int ret;
+	unsigned long has_zeropage;
+
+	has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
+
+	if (offset >= nr_pages * page_size)
+		fprintf(stderr, "unexpected offset %lu\n",
+			offset), exit(1);
+	uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
+	uffdio_zeropage.range.len = page_size;
+	uffdio_zeropage.mode = 0;
+	ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
+	if (ret) {
+		/* real retval in ufdio_zeropage.zeropage */
+		if (has_zeropage) {
+			if (uffdio_zeropage.zeropage == -EEXIST)
+				fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
+					exit(1);
+			else
+				fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
+					uffdio_zeropage.zeropage), exit(1);
+		} else {
+			if (uffdio_zeropage.zeropage != -EINVAL)
+				fprintf(stderr,
+					"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
+					uffdio_zeropage.zeropage), exit(1);
+		}
+	} else if (has_zeropage) {
+		if (uffdio_zeropage.zeropage != page_size) {
+			fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
+				uffdio_zeropage.zeropage), exit(1);
+		} else {
+			if (test_uffdio_zeropage_eexist && retry) {
+				test_uffdio_zeropage_eexist = false;
+				retry_uffdio_zeropage(ufd, &uffdio_zeropage,
+						      offset);
+			}
+			return 1;
+		}
+	} else {
+		fprintf(stderr,
+			"UFFDIO_ZEROPAGE succeeded %Ld\n",
+			uffdio_zeropage.zeropage), exit(1);
+	}
+
+	return 0;
+}
+
+static int uffdio_zeropage(int ufd, unsigned long offset)
+{
+	return __uffdio_zeropage(ufd, offset, false);
+}
+
+/* exercise UFFDIO_ZEROPAGE */
+static int userfaultfd_zeropage_test(void)
+{
+	struct uffdio_register uffdio_register;
+	unsigned long expected_ioctls;
+
+	printf("testing UFFDIO_ZEROPAGE: ");
+	fflush(stdout);
+
+	if (uffd_test_ops->release_pages(area_dst))
+		return 1;
+
+	if (userfaultfd_open(0) < 0)
+		return 1;
+	uffdio_register.range.start = (unsigned long) area_dst;
+	uffdio_register.range.len = nr_pages * page_size;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+		fprintf(stderr, "register failure\n"), exit(1);
+
+	expected_ioctls = uffd_test_ops->expected_ioctls;
+	if ((uffdio_register.ioctls & expected_ioctls) !=
+	    expected_ioctls)
+		fprintf(stderr,
+			"unexpected missing ioctl for anon memory\n"),
+			exit(1);
+
+	if (uffdio_zeropage(uffd, 0)) {
+		if (my_bcmp(area_dst, zeropage, page_size))
+			fprintf(stderr, "zeropage is not zero\n"), exit(1);
+	}
+
+	close(uffd);
+	printf("done.\n");
+	return 0;
+}
+
+static int userfaultfd_events_test(void)
+{
+	struct uffdio_register uffdio_register;
+	unsigned long expected_ioctls;
+	unsigned long userfaults;
+	pthread_t uffd_mon;
+	int err, features;
+	pid_t pid;
+	char c;
+
+	printf("testing events (fork, remap, remove): ");
+	fflush(stdout);
+
+	if (uffd_test_ops->release_pages(area_dst))
+		return 1;
+
+	features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
+		UFFD_FEATURE_EVENT_REMOVE;
+	if (userfaultfd_open(features) < 0)
+		return 1;
+	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+	uffdio_register.range.start = (unsigned long) area_dst;
+	uffdio_register.range.len = nr_pages * page_size;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+		fprintf(stderr, "register failure\n"), exit(1);
+
+	expected_ioctls = uffd_test_ops->expected_ioctls;
+	if ((uffdio_register.ioctls & expected_ioctls) !=
+	    expected_ioctls)
+		fprintf(stderr,
+			"unexpected missing ioctl for anon memory\n"),
+			exit(1);
+
+	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+		perror("uffd_poll_thread create"), exit(1);
+
+	pid = fork();
+	if (pid < 0)
+		perror("fork"), exit(1);
+
+	if (!pid)
+		return faulting_process(0);
+
+	waitpid(pid, &err, 0);
+	if (err)
+		fprintf(stderr, "faulting process failed\n"), exit(1);
+
+	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+		perror("pipe write"), exit(1);
+	if (pthread_join(uffd_mon, (void **)&userfaults))
+		return 1;
+
+	close(uffd);
+	printf("userfaults: %ld\n", userfaults);
+
+	return userfaults != nr_pages;
+}
+
+static int userfaultfd_sig_test(void)
+{
+	struct uffdio_register uffdio_register;
+	unsigned long expected_ioctls;
+	unsigned long userfaults;
+	pthread_t uffd_mon;
+	int err, features;
+	pid_t pid;
+	char c;
+
+	printf("testing signal delivery: ");
+	fflush(stdout);
+
+	if (uffd_test_ops->release_pages(area_dst))
+		return 1;
+
+	features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
+	if (userfaultfd_open(features) < 0)
+		return 1;
+	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+	uffdio_register.range.start = (unsigned long) area_dst;
+	uffdio_register.range.len = nr_pages * page_size;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+		fprintf(stderr, "register failure\n"), exit(1);
+
+	expected_ioctls = uffd_test_ops->expected_ioctls;
+	if ((uffdio_register.ioctls & expected_ioctls) !=
+	    expected_ioctls)
+		fprintf(stderr,
+			"unexpected missing ioctl for anon memory\n"),
+			exit(1);
+
+	if (faulting_process(1))
+		fprintf(stderr, "faulting process failed\n"), exit(1);
+
+	if (uffd_test_ops->release_pages(area_dst))
+		return 1;
+
+	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+		perror("uffd_poll_thread create"), exit(1);
+
+	pid = fork();
+	if (pid < 0)
+		perror("fork"), exit(1);
+
+	if (!pid)
+		exit(faulting_process(2));
+
+	waitpid(pid, &err, 0);
+	if (err)
+		fprintf(stderr, "faulting process failed\n"), exit(1);
+
+	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+		perror("pipe write"), exit(1);
+	if (pthread_join(uffd_mon, (void **)&userfaults))
+		return 1;
+
+	printf("done.\n");
+	if (userfaults)
+		fprintf(stderr, "Signal test failed, userfaults: %ld\n",
+			userfaults);
+	close(uffd);
+	return userfaults != 0;
+}
+static int userfaultfd_stress(void)
+{
+	void *area;
+	char *tmp_area;
+	unsigned long nr;
+	struct uffdio_register uffdio_register;
+	unsigned long cpu;
+	int err;
+	unsigned long userfaults[nr_cpus];
+
+	uffd_test_ops->allocate_area((void **)&area_src);
+	if (!area_src)
+		return 1;
+	uffd_test_ops->allocate_area((void **)&area_dst);
+	if (!area_dst)
+		return 1;
+
+	if (userfaultfd_open(0) < 0)
+		return 1;
+
+	count_verify = malloc(nr_pages * sizeof(unsigned long long));
+	if (!count_verify) {
+		perror("count_verify");
+		return 1;
+	}
+
+	for (nr = 0; nr < nr_pages; nr++) {
+		*area_mutex(area_src, nr) = (pthread_mutex_t)
+			PTHREAD_MUTEX_INITIALIZER;
+		count_verify[nr] = *area_count(area_src, nr) = 1;
+		/*
+		 * In the transition between 255 to 256, powerpc will
+		 * read out of order in my_bcmp and see both bytes as
+		 * zero, so leave a placeholder below always non-zero
+		 * after the count, to avoid my_bcmp to trigger false
+		 * positives.
+		 */
+		*(area_count(area_src, nr) + 1) = 1;
+	}
+
+	pipefd = malloc(sizeof(int) * nr_cpus * 2);
+	if (!pipefd) {
+		perror("pipefd");
+		return 1;
+	}
+	for (cpu = 0; cpu < nr_cpus; cpu++) {
+		if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
+			perror("pipe");
+			return 1;
+		}
+	}
+
+	if (posix_memalign(&area, page_size, page_size)) {
+		fprintf(stderr, "out of memory\n");
+		return 1;
+	}
+	zeropage = area;
+	bzero(zeropage, page_size);
+
+	pthread_mutex_lock(&uffd_read_mutex);
+
+	pthread_attr_init(&attr);
+	pthread_attr_setstacksize(&attr, 16*1024*1024);
+
+	err = 0;
+	while (bounces--) {
+		unsigned long expected_ioctls;
+
+		printf("bounces: %d, mode:", bounces);
+		if (bounces & BOUNCE_RANDOM)
+			printf(" rnd");
+		if (bounces & BOUNCE_RACINGFAULTS)
+			printf(" racing");
+		if (bounces & BOUNCE_VERIFY)
+			printf(" ver");
+		if (bounces & BOUNCE_POLL)
+			printf(" poll");
+		printf(", ");
+		fflush(stdout);
+
+		if (bounces & BOUNCE_POLL)
+			fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+		else
+			fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
+
+		/* register */
+		uffdio_register.range.start = (unsigned long) area_dst;
+		uffdio_register.range.len = nr_pages * page_size;
+		uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+		if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+			fprintf(stderr, "register failure\n");
+			return 1;
+		}
+		expected_ioctls = uffd_test_ops->expected_ioctls;
+		if ((uffdio_register.ioctls & expected_ioctls) !=
+		    expected_ioctls) {
+			fprintf(stderr,
+				"unexpected missing ioctl for anon memory\n");
+			return 1;
+		}
+
+		if (area_dst_alias) {
+			uffdio_register.range.start = (unsigned long)
+				area_dst_alias;
+			if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+				fprintf(stderr, "register failure alias\n");
+				return 1;
+			}
+		}
+
+		/*
+		 * The madvise done previously isn't enough: some
+		 * uffd_thread could have read userfaults (one of
+		 * those already resolved by the background thread)
+		 * and it may be in the process of calling
+		 * UFFDIO_COPY. UFFDIO_COPY will read the zapped
+		 * area_src and it would map a zero page in it (of
+		 * course such a UFFDIO_COPY is perfectly safe as it'd
+		 * return -EEXIST). The problem comes at the next
+		 * bounce though: that racing UFFDIO_COPY would
+		 * generate zeropages in the area_src, so invalidating
+		 * the previous MADV_DONTNEED. Without this additional
+		 * MADV_DONTNEED those zeropages leftovers in the
+		 * area_src would lead to -EEXIST failure during the
+		 * next bounce, effectively leaving a zeropage in the
+		 * area_dst.
+		 *
+		 * Try to comment this out madvise to see the memory
+		 * corruption being caught pretty quick.
+		 *
+		 * khugepaged is also inhibited to collapse THP after
+		 * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
+		 * required to MADV_DONTNEED here.
+		 */
+		if (uffd_test_ops->release_pages(area_dst))
+			return 1;
+
+		/* bounce pass */
+		if (stress(userfaults))
+			return 1;
+
+		/* unregister */
+		if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
+			fprintf(stderr, "unregister failure\n");
+			return 1;
+		}
+		if (area_dst_alias) {
+			uffdio_register.range.start = (unsigned long) area_dst;
+			if (ioctl(uffd, UFFDIO_UNREGISTER,
+				  &uffdio_register.range)) {
+				fprintf(stderr, "unregister failure alias\n");
+				return 1;
+			}
+		}
+
+		/* verification */
+		if (bounces & BOUNCE_VERIFY) {
+			for (nr = 0; nr < nr_pages; nr++) {
+				if (*area_count(area_dst, nr) != count_verify[nr]) {
+					fprintf(stderr,
+						"error area_count %Lu %Lu %lu\n",
+						*area_count(area_src, nr),
+						count_verify[nr],
+						nr);
+					err = 1;
+					bounces = 0;
+				}
+			}
+		}
+
+		/* prepare next bounce */
+		tmp_area = area_src;
+		area_src = area_dst;
+		area_dst = tmp_area;
+
+		tmp_area = area_src_alias;
+		area_src_alias = area_dst_alias;
+		area_dst_alias = tmp_area;
+
+		printf("userfaults:");
+		for (cpu = 0; cpu < nr_cpus; cpu++)
+			printf(" %lu", userfaults[cpu]);
+		printf("\n");
+	}
+
+	if (err)
+		return err;
+
+	close(uffd);
+	return userfaultfd_zeropage_test() || userfaultfd_sig_test()
+		|| userfaultfd_events_test();
+}
+
+/*
+ * Copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+	unsigned long hps = 0;
+	char *line = NULL;
+	size_t linelen = 0;
+	FILE *f = fopen("/proc/meminfo", "r");
+
+	if (!f)
+		return 0;
+	while (getline(&line, &linelen, f) > 0) {
+		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
+			hps <<= 10;
+			break;
+		}
+	}
+
+	free(line);
+	fclose(f);
+	return hps;
+}
+
+static void set_test_type(const char *type)
+{
+	if (!strcmp(type, "anon")) {
+		test_type = TEST_ANON;
+		uffd_test_ops = &anon_uffd_test_ops;
+	} else if (!strcmp(type, "hugetlb")) {
+		test_type = TEST_HUGETLB;
+		uffd_test_ops = &hugetlb_uffd_test_ops;
+	} else if (!strcmp(type, "hugetlb_shared")) {
+		map_shared = true;
+		test_type = TEST_HUGETLB;
+		uffd_test_ops = &hugetlb_uffd_test_ops;
+	} else if (!strcmp(type, "shmem")) {
+		map_shared = true;
+		test_type = TEST_SHMEM;
+		uffd_test_ops = &shmem_uffd_test_ops;
+	} else {
+		fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
+	}
+
+	if (test_type == TEST_HUGETLB)
+		page_size = default_huge_page_size();
+	else
+		page_size = sysconf(_SC_PAGE_SIZE);
+
+	if (!page_size)
+		fprintf(stderr, "Unable to determine page size\n"),
+				exit(2);
+	if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
+	    > page_size)
+		fprintf(stderr, "Impossible to run this test\n"), exit(2);
+}
+
+static void sigalrm(int sig)
+{
+	if (sig != SIGALRM)
+		abort();
+	test_uffdio_copy_eexist = true;
+	test_uffdio_zeropage_eexist = true;
+	alarm(ALARM_INTERVAL_SECS);
+}
+
+int main(int argc, char **argv)
+{
+	if (argc < 4)
+		fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"),
+				exit(1);
+
+	if (signal(SIGALRM, sigalrm) == SIG_ERR)
+		fprintf(stderr, "failed to arm SIGALRM"), exit(1);
+	alarm(ALARM_INTERVAL_SECS);
+
+	set_test_type(argv[1]);
+
+	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+	nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
+		nr_cpus;
+	if (!nr_pages_per_cpu) {
+		fprintf(stderr, "invalid MiB\n");
+		fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+	}
+
+	bounces = atoi(argv[3]);
+	if (bounces <= 0) {
+		fprintf(stderr, "invalid bounces\n");
+		fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+	}
+	nr_pages = nr_pages_per_cpu * nr_cpus;
+
+	if (test_type == TEST_HUGETLB) {
+		if (argc < 5)
+			fprintf(stderr, "Usage: hugetlb <MiB> <bounces> <hugetlbfs_file>\n"),
+				exit(1);
+		huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
+		if (huge_fd < 0) {
+			fprintf(stderr, "Open of %s failed", argv[3]);
+			perror("open");
+			exit(1);
+		}
+		if (ftruncate(huge_fd, 0)) {
+			fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
+			perror("ftruncate");
+			exit(1);
+		}
+	}
+	printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
+	       nr_pages, nr_pages_per_cpu);
+	return userfaultfd_stress();
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+	printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
+	return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c
new file mode 100644
index 000000000..e7fe734c3
--- /dev/null
+++ b/tools/testing/selftests/vm/va_128TBswitch.c
@@ -0,0 +1,297 @@
+/*
+ *
+ * Authors: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+ * Authors: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <stdio.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#ifdef __powerpc64__
+#define PAGE_SIZE	(64 << 10)
+/*
+ * This will work with 16M and 2M hugepage size
+ */
+#define HUGETLB_SIZE	(16 << 20)
+#else
+#define PAGE_SIZE	(4 << 10)
+#define HUGETLB_SIZE	(2 << 20)
+#endif
+
+/*
+ * >= 128TB is the hint addr value we used to select
+ * large address space.
+ */
+#define ADDR_SWITCH_HINT (1UL << 47)
+#define LOW_ADDR	((void *) (1UL << 30))
+#define HIGH_ADDR	((void *) (1UL << 48))
+
+struct testcase {
+	void *addr;
+	unsigned long size;
+	unsigned long flags;
+	const char *msg;
+	unsigned int low_addr_required:1;
+	unsigned int keep_mapped:1;
+};
+
+static struct testcase testcases[] = {
+	{
+		/*
+		 * If stack is moved, we could possibly allocate
+		 * this at the requested address.
+		 */
+		.addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+		.size = PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)",
+		.low_addr_required = 1,
+	},
+	{
+		/*
+		 * We should never allocate at the requested address or above it
+		 * The len cross the 128TB boundary. Without MAP_FIXED
+		 * we will always search in the lower address space.
+		 */
+		.addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))",
+		.low_addr_required = 1,
+	},
+	{
+		/*
+		 * Exact mapping at 128TB, the area is free we should get that
+		 * even without MAP_FIXED.
+		 */
+		.addr = ((void *)(ADDR_SWITCH_HINT)),
+		.size = PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)",
+	},
+	{
+		.addr = NULL,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(NULL)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = LOW_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(LOW_ADDR)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR) again",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(HIGH_ADDR, MAP_FIXED)",
+	},
+	{
+		.addr = (void *) -1,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *) -1,
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1) again",
+	},
+	{
+		.addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+		.size = PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = ((void *)(ADDR_SWITCH_HINT)),
+		.size = PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)",
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT),
+		.size = 2 * PAGE_SIZE,
+		.flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)",
+	},
+};
+
+static struct testcase hugetlb_testcases[] = {
+	{
+		.addr = NULL,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(NULL, MAP_HUGETLB)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = LOW_ADDR,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(LOW_ADDR, MAP_HUGETLB)",
+		.low_addr_required = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR, MAP_HUGETLB)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = HIGH_ADDR,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)",
+	},
+	{
+		.addr = (void *) -1,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1, MAP_HUGETLB)",
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *) -1,
+		.size = HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(-1, MAP_HUGETLB) again",
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE),
+		.size = 2 * HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+		.msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)",
+		.low_addr_required = 1,
+		.keep_mapped = 1,
+	},
+	{
+		.addr = (void *)(ADDR_SWITCH_HINT),
+		.size = 2 * HUGETLB_SIZE,
+		.flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+		.msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)",
+	},
+};
+
+static int run_test(struct testcase *test, int count)
+{
+	void *p;
+	int i, ret = 0;
+
+	for (i = 0; i < count; i++) {
+		struct testcase *t = test + i;
+
+		p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0);
+
+		printf("%s: %p - ", t->msg, p);
+
+		if (p == MAP_FAILED) {
+			printf("FAILED\n");
+			ret = 1;
+			continue;
+		}
+
+		if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) {
+			printf("FAILED\n");
+			ret = 1;
+		} else {
+			/*
+			 * Do a dereference of the address returned so that we catch
+			 * bugs in page fault handling
+			 */
+			memset(p, 0, t->size);
+			printf("OK\n");
+		}
+		if (!t->keep_mapped)
+			munmap(p, t->size);
+	}
+
+	return ret;
+}
+
+static int supported_arch(void)
+{
+#if defined(__powerpc64__)
+	return 1;
+#elif defined(__x86_64__)
+	return 1;
+#else
+	return 0;
+#endif
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	if (!supported_arch())
+		return 0;
+
+	ret = run_test(testcases, ARRAY_SIZE(testcases));
+	if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
+		ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases));
+	return ret;
+}
diff --git a/tools/testing/selftests/vm/virtual_address_range.c b/tools/testing/selftests/vm/virtual_address_range.c
new file mode 100644
index 000000000..1830d66a6
--- /dev/null
+++ b/tools/testing/selftests/vm/virtual_address_range.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2017, Anshuman Khandual, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Works on architectures which support 128TB virtual
+ * address range and beyond.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+
+/*
+ * Maximum address range mapped with a single mmap()
+ * call is little bit more than 16GB. Hence 16GB is
+ * chosen as the single chunk size for address space
+ * mapping.
+ */
+#define MAP_CHUNK_SIZE   17179869184UL /* 16GB */
+
+/*
+ * Address space till 128TB is mapped without any hint
+ * and is enabled by default. Address space beyond 128TB
+ * till 512TB is obtained by passing hint address as the
+ * first argument into mmap() system call.
+ *
+ * The process heap address space is divided into two
+ * different areas one below 128TB and one above 128TB
+ * till it reaches 512TB. One with size 128TB and the
+ * other being 384TB.
+ *
+ * On Arm64 the address space is 256TB and no high mappings
+ * are supported so far.
+ */
+
+#define NR_CHUNKS_128TB   8192UL /* Number of 16GB chunks for 128TB */
+#define NR_CHUNKS_256TB   (NR_CHUNKS_128TB * 2UL)
+#define NR_CHUNKS_384TB   (NR_CHUNKS_128TB * 3UL)
+
+#define ADDR_MARK_128TB  (1UL << 47) /* First address beyond 128TB */
+#define ADDR_MARK_256TB  (1UL << 48) /* First address beyond 256TB */
+
+#ifdef __aarch64__
+#define HIGH_ADDR_MARK  ADDR_MARK_256TB
+#define HIGH_ADDR_SHIFT 49
+#define NR_CHUNKS_LOW   NR_CHUNKS_256TB
+#define NR_CHUNKS_HIGH  0
+#else
+#define HIGH_ADDR_MARK  ADDR_MARK_128TB
+#define HIGH_ADDR_SHIFT 48
+#define NR_CHUNKS_LOW   NR_CHUNKS_128TB
+#define NR_CHUNKS_HIGH  NR_CHUNKS_384TB
+#endif
+
+static char *hind_addr(void)
+{
+	int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT);
+
+	return (char *) (1UL << bits);
+}
+
+static int validate_addr(char *ptr, int high_addr)
+{
+	unsigned long addr = (unsigned long) ptr;
+
+	if (high_addr) {
+		if (addr < HIGH_ADDR_MARK) {
+			printf("Bad address %lx\n", addr);
+			return 1;
+		}
+		return 0;
+	}
+
+	if (addr > HIGH_ADDR_MARK) {
+		printf("Bad address %lx\n", addr);
+		return 1;
+	}
+	return 0;
+}
+
+static int validate_lower_address_hint(void)
+{
+	char *ptr;
+
+	ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ |
+			PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+	if (ptr == MAP_FAILED)
+		return 0;
+
+	return 1;
+}
+
+int main(int argc, char *argv[])
+{
+	char *ptr[NR_CHUNKS_LOW];
+	char *hptr[NR_CHUNKS_HIGH];
+	char *hint;
+	unsigned long i, lchunks, hchunks;
+
+	for (i = 0; i < NR_CHUNKS_LOW; i++) {
+		ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+					MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+		if (ptr[i] == MAP_FAILED) {
+			if (validate_lower_address_hint())
+				return 1;
+			break;
+		}
+
+		if (validate_addr(ptr[i], 0))
+			return 1;
+	}
+	lchunks = i;
+
+	for (i = 0; i < NR_CHUNKS_HIGH; i++) {
+		hint = hind_addr();
+		hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+					MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+		if (hptr[i] == MAP_FAILED)
+			break;
+
+		if (validate_addr(hptr[i], 1))
+			return 1;
+	}
+	hchunks = i;
+
+	for (i = 0; i < lchunks; i++)
+		munmap(ptr[i], MAP_CHUNK_SIZE);
+
+	for (i = 0; i < hchunks; i++)
+		munmap(hptr[i], MAP_CHUNK_SIZE);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/watchdog/.gitignore b/tools/testing/selftests/watchdog/.gitignore
new file mode 100644
index 000000000..5aac51575
--- /dev/null
+++ b/tools/testing/selftests/watchdog/.gitignore
@@ -0,0 +1 @@
+watchdog-test
diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile
new file mode 100644
index 000000000..6b5598b55
--- /dev/null
+++ b/tools/testing/selftests/watchdog/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := watchdog-test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
new file mode 100644
index 000000000..f1c6e025c
--- /dev/null
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Watchdog Driver Test Program
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+
+#define DEFAULT_PING_RATE	1
+
+int fd;
+const char v = 'V';
+static const char sopts[] = "bdehp:t:";
+static const struct option lopts[] = {
+	{"bootstatus",          no_argument, NULL, 'b'},
+	{"disable",             no_argument, NULL, 'd'},
+	{"enable",              no_argument, NULL, 'e'},
+	{"help",                no_argument, NULL, 'h'},
+	{"pingrate",      required_argument, NULL, 'p'},
+	{"timeout",       required_argument, NULL, 't'},
+	{NULL,                  no_argument, NULL, 0x0}
+};
+
+/*
+ * This function simply sends an IOCTL to the driver, which in turn ticks
+ * the PC Watchdog card to reset its internal timer so it doesn't trigger
+ * a computer reset.
+ */
+static void keep_alive(void)
+{
+	int dummy;
+	int ret;
+
+	ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy);
+	if (!ret)
+		printf(".");
+}
+
+/*
+ * The main program.  Run the program with "-d" to disable the card,
+ * or "-e" to enable the card.
+ */
+
+static void term(int sig)
+{
+	int ret = write(fd, &v, 1);
+
+	close(fd);
+	if (ret < 0)
+		printf("\nStopping watchdog ticks failed (%d)...\n", errno);
+	else
+		printf("\nStopping watchdog ticks...\n");
+	exit(0);
+}
+
+static void usage(char *progname)
+{
+	printf("Usage: %s [options]\n", progname);
+	printf(" -b, --bootstatus    Get last boot status (Watchdog/POR)\n");
+	printf(" -d, --disable       Turn off the watchdog timer\n");
+	printf(" -e, --enable        Turn on the watchdog timer\n");
+	printf(" -h, --help          Print the help message\n");
+	printf(" -p, --pingrate=P    Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE);
+	printf(" -t, --timeout=T     Set timeout to T seconds\n");
+	printf("\n");
+	printf("Parameters are parsed left-to-right in real-time.\n");
+	printf("Example: %s -d -t 10 -p 5 -e\n", progname);
+}
+
+int main(int argc, char *argv[])
+{
+	int flags;
+	unsigned int ping_rate = DEFAULT_PING_RATE;
+	int ret;
+	int c;
+	int oneshot = 0;
+
+	setbuf(stdout, NULL);
+
+	fd = open("/dev/watchdog", O_WRONLY);
+
+	if (fd == -1) {
+		if (errno == ENOENT)
+			printf("Watchdog device not enabled.\n");
+		else if (errno == EACCES)
+			printf("Run watchdog as root.\n");
+		else
+			printf("Watchdog device open failed %s\n",
+				strerror(errno));
+		exit(-1);
+	}
+
+	while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
+		switch (c) {
+		case 'b':
+			flags = 0;
+			oneshot = 1;
+			ret = ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
+			if (!ret)
+				printf("Last boot is caused by: %s.\n", (flags != 0) ?
+					"Watchdog" : "Power-On-Reset");
+			else
+				printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno));
+			break;
+		case 'd':
+			flags = WDIOS_DISABLECARD;
+			ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
+			if (!ret)
+				printf("Watchdog card disabled.\n");
+			else
+				printf("WDIOS_DISABLECARD error '%s'\n", strerror(errno));
+			break;
+		case 'e':
+			flags = WDIOS_ENABLECARD;
+			ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
+			if (!ret)
+				printf("Watchdog card enabled.\n");
+			else
+				printf("WDIOS_ENABLECARD error '%s'\n", strerror(errno));
+			break;
+		case 'p':
+			ping_rate = strtoul(optarg, NULL, 0);
+			if (!ping_rate)
+				ping_rate = DEFAULT_PING_RATE;
+			printf("Watchdog ping rate set to %u seconds.\n", ping_rate);
+			break;
+		case 't':
+			flags = strtoul(optarg, NULL, 0);
+			ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags);
+			if (!ret)
+				printf("Watchdog timeout set to %u seconds.\n", flags);
+			else
+				printf("WDIOC_SETTIMEOUT error '%s'\n", strerror(errno));
+			break;
+		default:
+			usage(argv[0]);
+			goto end;
+		}
+	}
+
+	if (oneshot)
+		goto end;
+
+	printf("Watchdog Ticking Away!\n");
+
+	signal(SIGINT, term);
+
+	while (1) {
+		keep_alive();
+		sleep(ping_rate);
+	}
+end:
+	ret = write(fd, &v, 1);
+	if (ret < 0)
+		printf("Stopping watchdog ticks failed (%d)...\n", errno);
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
new file mode 100644
index 000000000..7757f73ff
--- /dev/null
+++ b/tools/testing/selftests/x86/.gitignore
@@ -0,0 +1,15 @@
+*_32
+*_64
+single_step_syscall
+sysret_ss_attrs
+syscall_nt
+ptrace_syscall
+test_mremap_vdso
+check_initial_reg_state
+sigreturn
+ldt_gdt
+iopl
+mpx-mini-test
+ioperm
+protection_keys
+test_vdso
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
new file mode 100644
index 000000000..186520198
--- /dev/null
+++ b/tools/testing/selftests/x86/Makefile
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all all_32 all_64 warn_32bit_failure clean
+
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
+			check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
+			protection_keys test_vdso test_vsyscall mov_ss_trap
+TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
+			test_FCMOV test_FCOMI test_FISTTP \
+			vdso_restorer
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+# Some selftests require 32bit support enabled also on 64bit systems
+TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
+
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED)
+TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
+TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
+endif
+
+BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
+BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
+
+BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+
+# call32_from_64 in thunks.S uses absolute addresses.
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
+
+define gen-target-rule-32
+$(1) $(1)_32: $(OUTPUT)/$(1)_32
+.PHONY: $(1) $(1)_32
+endef
+
+define gen-target-rule-64
+$(1) $(1)_64: $(OUTPUT)/$(1)_64
+.PHONY: $(1) $(1)_64
+endef
+
+ifeq ($(CAN_BUILD_I386),1)
+all: all_32
+TEST_PROGS += $(BINARIES_32)
+EXTRA_CFLAGS += -DCAN_BUILD_32
+$(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t))))
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+all: all_64
+TEST_PROGS += $(BINARIES_64)
+EXTRA_CFLAGS += -DCAN_BUILD_64
+$(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t))))
+endif
+
+all_32: $(BINARIES_32)
+
+all_64: $(BINARIES_64)
+
+EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
+
+$(BINARIES_32): $(OUTPUT)/%_32: %.c
+	$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
+
+$(BINARIES_64): $(OUTPUT)/%_64: %.c
+	$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+	@echo "Warning: you seem to have a broken 32-bit build" 2>&1; 	\
+	echo "environment.  This will reduce test coverage of 64-bit" 2>&1; \
+	echo "kernels.  If you are using a Debian-like distribution," 2>&1; \
+	echo "try:"; 2>&1; \
+	echo "";							\
+	echo "  apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+	echo "";							\
+	echo "If you are using a Fedora-like distribution, try:";	\
+	echo "";							\
+	echo "  yum install glibc-devel.*i686";				\
+	exit 0;
+endif
+
+# Some tests have additional dependencies.
+$(OUTPUT)/sysret_ss_attrs_64: thunks.S
+$(OUTPUT)/ptrace_syscall_32: raw_syscall_helper_32.S
+$(OUTPUT)/test_syscall_vdso_32: thunks_32.S
+
+# check_initial_reg_state is special: it needs a custom entry, and it
+# needs to be static so that its interpreter doesn't destroy its initial
+# state.
+$(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
+$(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
new file mode 100755
index 000000000..356689c56
--- /dev/null
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# check_cc.sh - Helper to test userspace compilation support
+# Copyright (c) 2015 Andrew Lutomirski
+# GPL v2
+
+CC="$1"
+TESTPROG="$2"
+shift 2
+
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+    echo 1
+else
+    echo 0
+fi
+
+exit 0
diff --git a/tools/testing/selftests/x86/check_initial_reg_state.c b/tools/testing/selftests/x86/check_initial_reg_state.c
new file mode 100644
index 000000000..6aaed9b85
--- /dev/null
+++ b/tools/testing/selftests/x86/check_initial_reg_state.c
@@ -0,0 +1,109 @@
+/*
+ * check_initial_reg_state.c - check that execve sets the correct state
+ * Copyright (c) 2014-2016 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+
+unsigned long ax, bx, cx, dx, si, di, bp, sp, flags;
+unsigned long r8, r9, r10, r11, r12, r13, r14, r15;
+
+asm (
+	".pushsection .text\n\t"
+	".type real_start, @function\n\t"
+	".global real_start\n\t"
+	"real_start:\n\t"
+#ifdef __x86_64__
+	"mov %rax, ax\n\t"
+	"mov %rbx, bx\n\t"
+	"mov %rcx, cx\n\t"
+	"mov %rdx, dx\n\t"
+	"mov %rsi, si\n\t"
+	"mov %rdi, di\n\t"
+	"mov %rbp, bp\n\t"
+	"mov %rsp, sp\n\t"
+	"mov %r8, r8\n\t"
+	"mov %r9, r9\n\t"
+	"mov %r10, r10\n\t"
+	"mov %r11, r11\n\t"
+	"mov %r12, r12\n\t"
+	"mov %r13, r13\n\t"
+	"mov %r14, r14\n\t"
+	"mov %r15, r15\n\t"
+	"pushfq\n\t"
+	"popq flags\n\t"
+#else
+	"mov %eax, ax\n\t"
+	"mov %ebx, bx\n\t"
+	"mov %ecx, cx\n\t"
+	"mov %edx, dx\n\t"
+	"mov %esi, si\n\t"
+	"mov %edi, di\n\t"
+	"mov %ebp, bp\n\t"
+	"mov %esp, sp\n\t"
+	"pushfl\n\t"
+	"popl flags\n\t"
+#endif
+	"jmp _start\n\t"
+	".size real_start, . - real_start\n\t"
+	".popsection");
+
+int main()
+{
+	int nerrs = 0;
+
+	if (sp == 0) {
+		printf("[FAIL]\tTest was built incorrectly\n");
+		return 1;
+	}
+
+	if (ax || bx || cx || dx || si || di || bp
+#ifdef __x86_64__
+	    || r8 || r9 || r10 || r11 || r12 || r13 || r14 || r15
+#endif
+		) {
+		printf("[FAIL]\tAll GPRs except SP should be 0\n");
+#define SHOW(x) printf("\t" #x " = 0x%lx\n", x);
+		SHOW(ax);
+		SHOW(bx);
+		SHOW(cx);
+		SHOW(dx);
+		SHOW(si);
+		SHOW(di);
+		SHOW(bp);
+		SHOW(sp);
+#ifdef __x86_64__
+		SHOW(r8);
+		SHOW(r9);
+		SHOW(r10);
+		SHOW(r11);
+		SHOW(r12);
+		SHOW(r13);
+		SHOW(r14);
+		SHOW(r15);
+#endif
+		nerrs++;
+	} else {
+		printf("[OK]\tAll GPRs except SP are 0\n");
+	}
+
+	if (flags != 0x202) {
+		printf("[FAIL]\tFLAGS is 0x%lx, but it should be 0x202\n", flags);
+		nerrs++;
+	} else {
+		printf("[OK]\tFLAGS is 0x202\n");
+	}
+
+	return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
new file mode 100644
index 000000000..ade443a88
--- /dev/null
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -0,0 +1,349 @@
+/*
+ * entry_from_vm86.c - tests kernel entries from vm86 mode
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This exercises a few paths that need to special-case vm86 mode.
+ *
+ * GPL v2.
+ */
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/vm86.h>
+
+static unsigned long load_addr = 0x10000;
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static sig_atomic_t got_signal;
+
+static void sighandler(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
+	    (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
+		printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
+		nerrs++;
+	}
+
+	const char *signame;
+	if (sig == SIGSEGV)
+		signame = "SIGSEGV";
+	else if (sig == SIGILL)
+		signame = "SIGILL";
+	else
+		signame = "unexpected signal";
+
+	printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
+	       (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
+	       (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
+
+	got_signal = 1;
+}
+
+asm (
+	".pushsection .rodata\n\t"
+	".type vmcode_bound, @object\n\t"
+	"vmcode:\n\t"
+	"vmcode_bound:\n\t"
+	".code16\n\t"
+	"bound %ax, (2048)\n\t"
+	"int3\n\t"
+	"vmcode_sysenter:\n\t"
+	"sysenter\n\t"
+	"vmcode_syscall:\n\t"
+	"syscall\n\t"
+	"vmcode_sti:\n\t"
+	"sti\n\t"
+	"vmcode_int3:\n\t"
+	"int3\n\t"
+	"vmcode_int80:\n\t"
+	"int $0x80\n\t"
+	"vmcode_popf_hlt:\n\t"
+	"push %ax\n\t"
+	"popf\n\t"
+	"hlt\n\t"
+	"vmcode_umip:\n\t"
+	/* addressing via displacements */
+	"smsw (2052)\n\t"
+	"sidt (2054)\n\t"
+	"sgdt (2060)\n\t"
+	/* addressing via registers */
+	"mov $2066, %bx\n\t"
+	"smsw (%bx)\n\t"
+	"mov $2068, %bx\n\t"
+	"sidt (%bx)\n\t"
+	"mov $2074, %bx\n\t"
+	"sgdt (%bx)\n\t"
+	/* register operands, only for smsw */
+	"smsw %ax\n\t"
+	"mov %ax, (2080)\n\t"
+	"int3\n\t"
+	"vmcode_umip_str:\n\t"
+	"str %eax\n\t"
+	"vmcode_umip_sldt:\n\t"
+	"sldt %eax\n\t"
+	"int3\n\t"
+	".size vmcode, . - vmcode\n\t"
+	"end_vmcode:\n\t"
+	".code32\n\t"
+	".popsection"
+	);
+
+extern unsigned char vmcode[], end_vmcode[];
+extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
+	vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_popf_hlt[],
+	vmcode_umip[], vmcode_umip_str[], vmcode_umip_sldt[];
+
+/* Returns false if the test was skipped. */
+static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
+		    unsigned int rettype, unsigned int retarg,
+		    const char *text)
+{
+	long ret;
+
+	printf("[RUN]\t%s from vm86 mode\n", text);
+	v86->regs.eip = eip;
+	ret = vm86(VM86_ENTER, v86);
+
+	if (ret == -1 && (errno == ENOSYS || errno == EPERM)) {
+		printf("[SKIP]\tvm86 %s\n",
+		       errno == ENOSYS ? "not supported" : "not allowed");
+		return false;
+	}
+
+	if (VM86_TYPE(ret) == VM86_INTx) {
+		char trapname[32];
+		int trapno = VM86_ARG(ret);
+		if (trapno == 13)
+			strcpy(trapname, "GP");
+		else if (trapno == 5)
+			strcpy(trapname, "BR");
+		else if (trapno == 14)
+			strcpy(trapname, "PF");
+		else
+			sprintf(trapname, "%d", trapno);
+
+		printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
+	} else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
+		printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
+	} else if (VM86_TYPE(ret) == VM86_TRAP) {
+		printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
+		       VM86_ARG(ret));
+	} else if (VM86_TYPE(ret) == VM86_SIGNAL) {
+		printf("[INFO]\tExited vm86 mode due to a signal\n");
+	} else if (VM86_TYPE(ret) == VM86_STI) {
+		printf("[INFO]\tExited vm86 mode due to STI\n");
+	} else {
+		printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
+		       VM86_TYPE(ret), VM86_ARG(ret));
+	}
+
+	if (rettype == -1 ||
+	    (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
+		printf("[OK]\tReturned correctly\n");
+	} else {
+		printf("[FAIL]\tIncorrect return reason (started at eip = 0x%lx, ended at eip = 0x%lx)\n", eip, v86->regs.eip);
+		nerrs++;
+	}
+
+	return true;
+}
+
+void do_umip_tests(struct vm86plus_struct *vm86, unsigned char *test_mem)
+{
+	struct table_desc {
+		unsigned short limit;
+		unsigned long base;
+	} __attribute__((packed));
+
+	/* Initialize variables with arbitrary values */
+	struct table_desc gdt1 = { .base = 0x3c3c3c3c, .limit = 0x9999 };
+	struct table_desc gdt2 = { .base = 0x1a1a1a1a, .limit = 0xaeae };
+	struct table_desc idt1 = { .base = 0x7b7b7b7b, .limit = 0xf1f1 };
+	struct table_desc idt2 = { .base = 0x89898989, .limit = 0x1313 };
+	unsigned short msw1 = 0x1414, msw2 = 0x2525, msw3 = 3737;
+
+	/* UMIP -- exit with INT3 unless kernel emulation did not trap #GP */
+	do_test(vm86, vmcode_umip - vmcode, VM86_TRAP, 3, "UMIP tests");
+
+	/* Results from displacement-only addressing */
+	msw1 = *(unsigned short *)(test_mem + 2052);
+	memcpy(&idt1, test_mem + 2054, sizeof(idt1));
+	memcpy(&gdt1, test_mem + 2060, sizeof(gdt1));
+
+	/* Results from register-indirect addressing */
+	msw2 = *(unsigned short *)(test_mem + 2066);
+	memcpy(&idt2, test_mem + 2068, sizeof(idt2));
+	memcpy(&gdt2, test_mem + 2074, sizeof(gdt2));
+
+	/* Results when using register operands */
+	msw3 = *(unsigned short *)(test_mem + 2080);
+
+	printf("[INFO]\tResult from SMSW:[0x%04x]\n", msw1);
+	printf("[INFO]\tResult from SIDT: limit[0x%04x]base[0x%08lx]\n",
+	       idt1.limit, idt1.base);
+	printf("[INFO]\tResult from SGDT: limit[0x%04x]base[0x%08lx]\n",
+	       gdt1.limit, gdt1.base);
+
+	if (msw1 != msw2 || msw1 != msw3)
+		printf("[FAIL]\tAll the results of SMSW should be the same.\n");
+	else
+		printf("[PASS]\tAll the results from SMSW are identical.\n");
+
+	if (memcmp(&gdt1, &gdt2, sizeof(gdt1)))
+		printf("[FAIL]\tAll the results of SGDT should be the same.\n");
+	else
+		printf("[PASS]\tAll the results from SGDT are identical.\n");
+
+	if (memcmp(&idt1, &idt2, sizeof(idt1)))
+		printf("[FAIL]\tAll the results of SIDT should be the same.\n");
+	else
+		printf("[PASS]\tAll the results from SIDT are identical.\n");
+
+	sethandler(SIGILL, sighandler, 0);
+	do_test(vm86, vmcode_umip_str - vmcode, VM86_SIGNAL, 0,
+		"STR instruction");
+	clearhandler(SIGILL);
+
+	sethandler(SIGILL, sighandler, 0);
+	do_test(vm86, vmcode_umip_sldt - vmcode, VM86_SIGNAL, 0,
+		"SLDT instruction");
+	clearhandler(SIGILL);
+}
+
+int main(void)
+{
+	struct vm86plus_struct v86;
+	unsigned char *addr = mmap((void *)load_addr, 4096,
+				   PROT_READ | PROT_WRITE | PROT_EXEC,
+				   MAP_ANONYMOUS | MAP_PRIVATE, -1,0);
+	if (addr != (unsigned char *)load_addr)
+		err(1, "mmap");
+
+	memcpy(addr, vmcode, end_vmcode - vmcode);
+	addr[2048] = 2;
+	addr[2050] = 3;
+
+	memset(&v86, 0, sizeof(v86));
+
+	v86.regs.cs = load_addr / 16;
+	v86.regs.ss = load_addr / 16;
+	v86.regs.ds = load_addr / 16;
+	v86.regs.es = load_addr / 16;
+
+	/* Use the end of the page as our stack. */
+	v86.regs.esp = 4096;
+
+	assert((v86.regs.cs & 3) == 0);	/* Looks like RPL = 0 */
+
+	/* #BR -- should deliver SIG??? */
+	do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
+
+	/*
+	 * SYSENTER -- should cause #GP or #UD depending on CPU.
+	 * Expected return type -1 means that we shouldn't validate
+	 * the vm86 return value.  This will avoid problems on non-SEP
+	 * CPUs.
+	 */
+	sethandler(SIGILL, sighandler, 0);
+	do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
+	clearhandler(SIGILL);
+
+	/*
+	 * SYSCALL would be a disaster in VM86 mode.  Fortunately,
+	 * there is no kernel that both enables SYSCALL and sets
+	 * EFER.SCE, so it's #UD on all systems.  But vm86 is
+	 * buggy (or has a "feature"), so the SIGILL will actually
+	 * be delivered.
+	 */
+	sethandler(SIGILL, sighandler, 0);
+	do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
+	clearhandler(SIGILL);
+
+	/* STI with VIP set */
+	v86.regs.eflags |= X86_EFLAGS_VIP;
+	v86.regs.eflags &= ~X86_EFLAGS_IF;
+	do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
+
+	/* POPF with VIP set but IF clear: should not trap */
+	v86.regs.eflags = X86_EFLAGS_VIP;
+	v86.regs.eax = 0;
+	do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP set and IF clear");
+
+	/* POPF with VIP set and IF set: should trap */
+	v86.regs.eflags = X86_EFLAGS_VIP;
+	v86.regs.eax = X86_EFLAGS_IF;
+	do_test(&v86, vmcode_popf_hlt - vmcode, VM86_STI, 0, "POPF with VIP and IF set");
+
+	/* POPF with VIP clear and IF set: should not trap */
+	v86.regs.eflags = 0;
+	v86.regs.eax = X86_EFLAGS_IF;
+	do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP clear and IF set");
+
+	v86.regs.eflags = 0;
+
+	/* INT3 -- should cause #BP */
+	do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
+
+	/* INT80 -- should exit with "INTx 0x80" */
+	v86.regs.eax = (unsigned int)-1;
+	do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
+
+	/* UMIP -- should exit with INTx 0x80 unless UMIP was not disabled */
+	do_umip_tests(&v86, addr);
+
+	/* Execute a null pointer */
+	v86.regs.cs = 0;
+	v86.regs.ss = 0;
+	sethandler(SIGSEGV, sighandler, 0);
+	got_signal = 0;
+	if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") &&
+	    !got_signal) {
+		printf("[FAIL]\tDid not receive SIGSEGV\n");
+		nerrs++;
+	}
+	clearhandler(SIGSEGV);
+
+	/* Make sure nothing explodes if we fork. */
+	if (fork() == 0)
+		return 0;
+
+	return (nerrs == 0 ? 0 : 1);
+}
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
new file mode 100644
index 000000000..f249e042b
--- /dev/null
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -0,0 +1,427 @@
+/*
+ * fsgsbase.c, an fsgsbase test
+ * Copyright (c) 2014-2016 Andy Lutomirski
+ * GPL v2
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <signal.h>
+#include <limits.h>
+#include <sys/ucontext.h>
+#include <sched.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+static volatile sig_atomic_t want_segv;
+static volatile unsigned long segv_addr;
+
+static int nerrs;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (!want_segv) {
+		clearhandler(SIGSEGV);
+		return;  /* Crash cleanly. */
+	}
+
+	want_segv = false;
+	segv_addr = (unsigned long)si->si_addr;
+
+	ctx->uc_mcontext.gregs[REG_RIP] += 4;	/* Skip the faulting mov */
+
+}
+
+enum which_base { FS, GS };
+
+static unsigned long read_base(enum which_base which)
+{
+	unsigned long offset;
+	/*
+	 * Unless we have FSGSBASE, there's no direct way to do this from
+	 * user mode.  We can get at it indirectly using signals, though.
+	 */
+
+	want_segv = true;
+
+	offset = 0;
+	if (which == FS) {
+		/* Use a constant-length instruction here. */
+		asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+	} else {
+		asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+	}
+	if (!want_segv)
+		return segv_addr + offset;
+
+	/*
+	 * If that didn't segfault, try the other end of the address space.
+	 * Unless we get really unlucky and run into the vsyscall page, this
+	 * is guaranteed to segfault.
+	 */
+
+	offset = (ULONG_MAX >> 1) + 1;
+	if (which == FS) {
+		asm volatile ("mov %%fs:(%%rcx), %%rax"
+			      : : "c" (offset) : "rax");
+	} else {
+		asm volatile ("mov %%gs:(%%rcx), %%rax"
+			      : : "c" (offset) : "rax");
+	}
+	if (!want_segv)
+		return segv_addr + offset;
+
+	abort();
+}
+
+static void check_gs_value(unsigned long value)
+{
+	unsigned long base;
+	unsigned short sel;
+
+	printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
+		err(1, "ARCH_SET_GS");
+
+	asm volatile ("mov %%gs, %0" : "=rm" (sel));
+	base = read_base(GS);
+	if (base == value) {
+		printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
+		       sel);
+	} else {
+		nerrs++;
+		printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
+		       base, sel);
+	}
+
+	if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
+		err(1, "ARCH_GET_GS");
+	if (base == value) {
+		printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
+		       sel);
+	} else {
+		nerrs++;
+		printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
+		       base, sel);
+	}
+}
+
+static void mov_0_gs(unsigned long initial_base, bool schedule)
+{
+	unsigned long base, arch_base;
+
+	printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
+		err(1, "ARCH_SET_GS");
+
+	if (schedule)
+		usleep(10);
+
+	asm volatile ("mov %0, %%gs" : : "rm" (0));
+	base = read_base(GS);
+	if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
+		err(1, "ARCH_GET_GS");
+	if (base == arch_base) {
+		printf("[OK]\tGSBASE is 0x%lx\n", base);
+	} else {
+		nerrs++;
+		printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
+	}
+}
+
+static volatile unsigned long remote_base;
+static volatile bool remote_hard_zero;
+static volatile unsigned int ftx;
+
+/*
+ * ARCH_SET_FS/GS(0) may or may not program a selector of zero.  HARD_ZERO
+ * means to force the selector to zero to improve test coverage.
+ */
+#define HARD_ZERO 0xa1fa5f343cb85fa4
+
+static void do_remote_base()
+{
+	unsigned long to_set = remote_base;
+	bool hard_zero = false;
+	if (to_set == HARD_ZERO) {
+		to_set = 0;
+		hard_zero = true;
+	}
+
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
+		err(1, "ARCH_SET_GS");
+
+	if (hard_zero)
+		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+	unsigned short sel;
+	asm volatile ("mov %%gs, %0" : "=rm" (sel));
+	printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
+	       to_set, hard_zero ? " and clear gs" : "", sel);
+}
+
+void do_unexpected_base(void)
+{
+	/*
+	 * The goal here is to try to arrange for GS == 0, GSBASE !=
+	 * 0, and for the the kernel the think that GSBASE == 0.
+	 *
+	 * To make the test as reliable as possible, this uses
+	 * explicit descriptorss.  (This is not the only way.  This
+	 * could use ARCH_SET_GS with a low, nonzero base, but the
+	 * relevant side effect of ARCH_SET_GS could change.)
+	 */
+
+	/* Step 1: tell the kernel that we have GSBASE == 0. */
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+		err(1, "ARCH_SET_GS");
+
+	/* Step 2: change GSBASE without telling the kernel. */
+	struct user_desc desc = {
+		.entry_number    = 0,
+		.base_addr       = 0xBAADF00D,
+		.limit           = 0xfffff,
+		.seg_32bit       = 1,
+		.contents        = 0, /* Data, grow-up */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 1,
+		.seg_not_present = 0,
+		.useable         = 0
+	};
+	if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+		printf("\tother thread: using LDT slot 0\n");
+		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
+	} else {
+		/* No modify_ldt for us (configured out, perhaps) */
+
+		struct user_desc *low_desc = mmap(
+			NULL, sizeof(desc),
+			PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+		memcpy(low_desc, &desc, sizeof(desc));
+
+		low_desc->entry_number = -1;
+
+		/* 32-bit set_thread_area */
+		long ret;
+		asm volatile ("int $0x80"
+			      : "=a" (ret) : "a" (243), "b" (low_desc)
+			      : "r8", "r9", "r10", "r11");
+		memcpy(&desc, low_desc, sizeof(desc));
+		munmap(low_desc, sizeof(desc));
+
+		if (ret != 0) {
+			printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
+			return;
+		}
+		printf("\tother thread: using GDT slot %d\n", desc.entry_number);
+		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
+	}
+
+	/*
+	 * Step 3: set the selector back to zero.  On AMD chips, this will
+	 * preserve GSBASE.
+	 */
+
+	asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+}
+
+static void *threadproc(void *ctx)
+{
+	while (1) {
+		while (ftx == 0)
+			syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+		if (ftx == 3)
+			return NULL;
+
+		if (ftx == 1)
+			do_remote_base();
+		else if (ftx == 2)
+			do_unexpected_base();
+		else
+			errx(1, "helper thread got bad command");
+
+		ftx = 0;
+		syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+	}
+}
+
+static void set_gs_and_switch_to(unsigned long local,
+				 unsigned short force_sel,
+				 unsigned long remote)
+{
+	unsigned long base;
+	unsigned short sel_pre_sched, sel_post_sched;
+
+	bool hard_zero = false;
+	if (local == HARD_ZERO) {
+		hard_zero = true;
+		local = 0;
+	}
+
+	printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
+	       local, hard_zero ? " and clear gs" : "", remote);
+	if (force_sel)
+		printf("\tBefore schedule, set selector to 0x%hx\n", force_sel);
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
+		err(1, "ARCH_SET_GS");
+	if (hard_zero)
+		asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+	if (read_base(GS) != local) {
+		nerrs++;
+		printf("[FAIL]\tGSBASE wasn't set as expected\n");
+	}
+
+	if (force_sel) {
+		asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
+		sel_pre_sched = force_sel;
+		local = read_base(GS);
+
+		/*
+		 * Signal delivery seems to mess up weird selectors.  Put it
+		 * back.
+		 */
+		asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
+	} else {
+		asm volatile ("mov %%gs, %0" : "=rm" (sel_pre_sched));
+	}
+
+	remote_base = remote;
+	ftx = 1;
+	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+	while (ftx != 0)
+		syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+	asm volatile ("mov %%gs, %0" : "=rm" (sel_post_sched));
+	base = read_base(GS);
+	if (base == local && sel_pre_sched == sel_post_sched) {
+		printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
+		       sel_pre_sched, local);
+	} else {
+		nerrs++;
+		printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
+		       sel_pre_sched, local, sel_post_sched, base);
+	}
+}
+
+static void test_unexpected_base(void)
+{
+	unsigned long base;
+
+	printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
+	if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+		err(1, "ARCH_SET_GS");
+	asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+	ftx = 2;
+	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+	while (ftx != 0)
+		syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+	base = read_base(GS);
+	if (base == 0) {
+		printf("[OK]\tGSBASE remained 0\n");
+	} else {
+		nerrs++;
+		printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+	}
+}
+
+int main()
+{
+	pthread_t thread;
+
+	sethandler(SIGSEGV, sigsegv, 0);
+
+	check_gs_value(0);
+	check_gs_value(1);
+	check_gs_value(0x200000000);
+	check_gs_value(0);
+	check_gs_value(0x200000000);
+	check_gs_value(1);
+
+	for (int sched = 0; sched < 2; sched++) {
+		mov_0_gs(0, !!sched);
+		mov_0_gs(1, !!sched);
+		mov_0_gs(0x200000000, !!sched);
+	}
+
+	/* Set up for multithreading. */
+
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		err(1, "sched_setaffinity to CPU 0");	/* should never fail */
+
+	if (pthread_create(&thread, 0, threadproc, 0) != 0)
+		err(1, "pthread_create");
+
+	static unsigned long bases_with_hard_zero[] = {
+		0, HARD_ZERO, 1, 0x200000000,
+	};
+
+	for (int local = 0; local < 4; local++) {
+		for (int remote = 0; remote < 4; remote++) {
+			for (unsigned short s = 0; s < 5; s++) {
+				unsigned short sel = s;
+				if (s == 4)
+					asm ("mov %%ss, %0" : "=rm" (sel));
+				set_gs_and_switch_to(
+					bases_with_hard_zero[local],
+					sel,
+					bases_with_hard_zero[remote]);
+			}
+		}
+	}
+
+	test_unexpected_base();
+
+	ftx = 3;  /* Kill the thread. */
+	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+	if (pthread_join(thread, NULL) != 0)
+		err(1, "pthread_join");
+
+	return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
new file mode 100644
index 000000000..01de41c1b
--- /dev/null
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ioperm.c - Test case for ioperm(2)
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+static bool try_outb(unsigned short port)
+{
+	sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+	if (sigsetjmp(jmpbuf, 1) != 0) {
+		return false;
+	} else {
+		asm volatile ("outb %%al, %w[port]"
+			      : : [port] "Nd" (port), "a" (0));
+		return true;
+	}
+	clearhandler(SIGSEGV);
+}
+
+static void expect_ok(unsigned short port)
+{
+	if (!try_outb(port)) {
+		printf("[FAIL]\toutb to 0x%02hx failed\n", port);
+		exit(1);
+	}
+
+	printf("[OK]\toutb to 0x%02hx worked\n", port);
+}
+
+static void expect_gp(unsigned short port)
+{
+	if (try_outb(port)) {
+		printf("[FAIL]\toutb to 0x%02hx worked\n", port);
+		exit(1);
+	}
+
+	printf("[OK]\toutb to 0x%02hx failed\n", port);
+}
+
+int main(void)
+{
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		err(1, "sched_setaffinity to CPU 0");
+
+	expect_gp(0x80);
+	expect_gp(0xed);
+
+	/*
+	 * Probe for ioperm support.  Note that clearing ioperm bits
+	 * works even as nonroot.
+	 */
+	printf("[RUN]\tenable 0x80\n");
+	if (ioperm(0x80, 1, 1) != 0) {
+		printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n",
+		       errno);
+		return 0;
+	}
+	expect_ok(0x80);
+	expect_gp(0xed);
+
+	printf("[RUN]\tdisable 0x80\n");
+	if (ioperm(0x80, 1, 0) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+	expect_gp(0x80);
+	expect_gp(0xed);
+
+	/* Make sure that fork() preserves ioperm. */
+	if (ioperm(0x80, 1, 1) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+
+	pid_t child = fork();
+	if (child == -1)
+		err(1, "fork");
+
+	if (child == 0) {
+		printf("[RUN]\tchild: check that we inherited permissions\n");
+		expect_ok(0x80);
+		expect_gp(0xed);
+		return 0;
+	} else {
+		int status;
+		if (waitpid(child, &status, 0) != child ||
+		    !WIFEXITED(status)) {
+			printf("[FAIL]\tChild died\n");
+			nerrs++;
+		} else if (WEXITSTATUS(status) != 0) {
+			printf("[FAIL]\tChild failed\n");
+			nerrs++;
+		} else {
+			printf("[OK]\tChild succeeded\n");
+		}
+	}
+
+	/* Test the capability checks. */
+
+	printf("\tDrop privileges\n");
+	if (setresuid(1, 1, 1) != 0) {
+		printf("[WARN]\tDropping privileges failed\n");
+		return 0;
+	}
+
+	printf("[RUN]\tdisable 0x80\n");
+	if (ioperm(0x80, 1, 0) != 0) {
+		printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+		return 1;
+	}
+	printf("[OK]\tit worked\n");
+
+	printf("[RUN]\tenable 0x80 again\n");
+	if (ioperm(0x80, 1, 1) == 0) {
+		printf("[FAIL]\tit succeeded but should have failed.\n");
+		return 1;
+	}
+	printf("[OK]\tit failed\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
new file mode 100644
index 000000000..6aa27f346
--- /dev/null
+++ b/tools/testing/selftests/x86/iopl.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * iopl.c - Test case for a Linux on Xen 64-bit bug
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+int main(void)
+{
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		err(1, "sched_setaffinity to CPU 0");
+
+	/* Probe for iopl support.  Note that iopl(0) works even as nonroot. */
+	if (iopl(3) != 0) {
+		printf("[OK]\tiopl(3) failed (%d) -- try running as root\n",
+		       errno);
+		return 0;
+	}
+
+	/* Restore our original state prior to starting the test. */
+	if (iopl(0) != 0)
+		err(1, "iopl(0)");
+
+	pid_t child = fork();
+	if (child == -1)
+		err(1, "fork");
+
+	if (child == 0) {
+		printf("\tchild: set IOPL to 3\n");
+		if (iopl(3) != 0)
+			err(1, "iopl");
+
+		printf("[RUN]\tchild: write to 0x80\n");
+		asm volatile ("outb %%al, $0x80" : : "a" (0));
+
+		return 0;
+	} else {
+		int status;
+		if (waitpid(child, &status, 0) != child ||
+		    !WIFEXITED(status)) {
+			printf("[FAIL]\tChild died\n");
+			nerrs++;
+		} else if (WEXITSTATUS(status) != 0) {
+			printf("[FAIL]\tChild failed\n");
+			nerrs++;
+		} else {
+			printf("[OK]\tChild succeeded\n");
+		}
+	}
+
+	printf("[RUN]\tparent: write to 0x80 (should fail)\n");
+
+	sethandler(SIGSEGV, sigsegv, 0);
+	if (sigsetjmp(jmpbuf, 1) != 0) {
+		printf("[OK]\twrite was denied\n");
+	} else {
+		asm volatile ("outb %%al, $0x80" : : "a" (0));
+		printf("[FAIL]\twrite was allowed\n");
+		nerrs++;
+	}
+
+	/* Test the capability checks. */
+	printf("\tiopl(3)\n");
+	if (iopl(3) != 0)
+		err(1, "iopl(3)");
+
+	printf("\tDrop privileges\n");
+	if (setresuid(1, 1, 1) != 0) {
+		printf("[WARN]\tDropping privileges failed\n");
+		goto done;
+	}
+
+	printf("[RUN]\tiopl(3) unprivileged but with IOPL==3\n");
+	if (iopl(3) != 0) {
+		printf("[FAIL]\tiopl(3) should work if iopl is already 3 even if unprivileged\n");
+		nerrs++;
+	}
+
+	printf("[RUN]\tiopl(0) unprivileged\n");
+	if (iopl(0) != 0) {
+		printf("[FAIL]\tiopl(0) should work if iopl is already 3 even if unprivileged\n");
+		nerrs++;
+	}
+
+	printf("[RUN]\tiopl(3) unprivileged\n");
+	if (iopl(3) == 0) {
+		printf("[FAIL]\tiopl(3) should fail if when unprivileged if iopl==0\n");
+		nerrs++;
+	} else {
+		printf("[OK]\tFailed as expected\n");
+	}
+
+done:
+	return nerrs ? 1 : 0;
+}
+
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
new file mode 100644
index 000000000..1aef72df2
--- /dev/null
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -0,0 +1,927 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/ldt.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sched.h>
+#include <linux/futex.h>
+#include <sys/mman.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+
+#define AR_ACCESSED		(1<<8)
+
+#define AR_TYPE_RODATA		(0 * (1<<9))
+#define AR_TYPE_RWDATA		(1 * (1<<9))
+#define AR_TYPE_RODATA_EXPDOWN	(2 * (1<<9))
+#define AR_TYPE_RWDATA_EXPDOWN	(3 * (1<<9))
+#define AR_TYPE_XOCODE		(4 * (1<<9))
+#define AR_TYPE_XRCODE		(5 * (1<<9))
+#define AR_TYPE_XOCODE_CONF	(6 * (1<<9))
+#define AR_TYPE_XRCODE_CONF	(7 * (1<<9))
+
+#define AR_DPL3			(3 * (1<<13))
+
+#define AR_S			(1 << 12)
+#define AR_P			(1 << 15)
+#define AR_AVL			(1 << 20)
+#define AR_L			(1 << 21)
+#define AR_DB			(1 << 22)
+#define AR_G			(1 << 23)
+
+#ifdef __x86_64__
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define INT80_CLOBBERS
+#endif
+
+static int nerrs;
+
+/* Points to an array of 1024 ints, each holding its own index. */
+static const unsigned int *counter_page;
+static struct user_desc *low_user_desc;
+static struct user_desc *low_user_desc_clear;  /* Use to delete GDT entry */
+static int gdt_entry_num;
+
+static void check_invalid_segment(uint16_t index, int ldt)
+{
+	uint32_t has_limit = 0, has_ar = 0, limit, ar;
+	uint32_t selector = (index << 3) | (ldt << 2) | 3;
+
+	asm ("lsl %[selector], %[limit]\n\t"
+	     "jnz 1f\n\t"
+	     "movl $1, %[has_limit]\n\t"
+	     "1:"
+	     : [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
+	     : [selector] "r" (selector));
+	asm ("larl %[selector], %[ar]\n\t"
+	     "jnz 1f\n\t"
+	     "movl $1, %[has_ar]\n\t"
+	     "1:"
+	     : [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
+	     : [selector] "r" (selector));
+
+	if (has_limit || has_ar) {
+		printf("[FAIL]\t%s entry %hu is valid but should be invalid\n",
+		       (ldt ? "LDT" : "GDT"), index);
+		nerrs++;
+	} else {
+		printf("[OK]\t%s entry %hu is invalid\n",
+		       (ldt ? "LDT" : "GDT"), index);
+	}
+}
+
+static void check_valid_segment(uint16_t index, int ldt,
+				uint32_t expected_ar, uint32_t expected_limit,
+				bool verbose)
+{
+	uint32_t has_limit = 0, has_ar = 0, limit, ar;
+	uint32_t selector = (index << 3) | (ldt << 2) | 3;
+
+	asm ("lsl %[selector], %[limit]\n\t"
+	     "jnz 1f\n\t"
+	     "movl $1, %[has_limit]\n\t"
+	     "1:"
+	     : [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
+	     : [selector] "r" (selector));
+	asm ("larl %[selector], %[ar]\n\t"
+	     "jnz 1f\n\t"
+	     "movl $1, %[has_ar]\n\t"
+	     "1:"
+	     : [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
+	     : [selector] "r" (selector));
+
+	if (!has_limit || !has_ar) {
+		printf("[FAIL]\t%s entry %hu is invalid but should be valid\n",
+		       (ldt ? "LDT" : "GDT"), index);
+		nerrs++;
+		return;
+	}
+
+	/* The SDM says "bits 19:16 are undefined".  Thanks. */
+	ar &= ~0xF0000;
+
+	/*
+	 * NB: Different Linux versions do different things with the
+	 * accessed bit in set_thread_area().
+	 */
+	if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
+		printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
+		       (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
+		nerrs++;
+	} else if (limit != expected_limit) {
+		printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n",
+		       (ldt ? "LDT" : "GDT"), index, limit, expected_limit);
+		nerrs++;
+	} else if (verbose) {
+		printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n",
+		       (ldt ? "LDT" : "GDT"), index, ar, limit);
+	}
+}
+
+static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
+			       bool oldmode, bool ldt)
+{
+	struct user_desc desc = *d;
+	int ret;
+
+	if (!ldt) {
+#ifndef __i386__
+		/* No point testing set_thread_area in a 64-bit build */
+		return false;
+#endif
+		if (!gdt_entry_num)
+			return false;
+		desc.entry_number = gdt_entry_num;
+
+		ret = syscall(SYS_set_thread_area, &desc);
+	} else {
+		ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+			      &desc, sizeof(desc));
+
+		if (ret < -1)
+			errno = -ret;
+
+		if (ret != 0 && errno == ENOSYS) {
+			printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+			return false;
+		}
+	}
+
+	if (ret == 0) {
+		uint32_t limit = desc.limit;
+		if (desc.limit_in_pages)
+			limit = (limit << 12) + 4095;
+		check_valid_segment(desc.entry_number, ldt, ar, limit, true);
+		return true;
+	} else {
+		if (desc.seg_32bit) {
+			printf("[FAIL]\tUnexpected %s failure %d\n",
+			       ldt ? "modify_ldt" : "set_thread_area",
+			       errno);
+			nerrs++;
+			return false;
+		} else {
+			printf("[OK]\t%s rejected 16 bit segment\n",
+			       ldt ? "modify_ldt" : "set_thread_area");
+			return false;
+		}
+	}
+}
+
+static bool install_valid(const struct user_desc *desc, uint32_t ar)
+{
+	bool ret = install_valid_mode(desc, ar, false, true);
+
+	if (desc->contents <= 1 && desc->seg_32bit &&
+	    !desc->seg_not_present) {
+		/* Should work in the GDT, too. */
+		install_valid_mode(desc, ar, false, false);
+	}
+
+	return ret;
+}
+
+static void install_invalid(const struct user_desc *desc, bool oldmode)
+{
+	int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+			  desc, sizeof(*desc));
+	if (ret < -1)
+		errno = -ret;
+	if (ret == 0) {
+		check_invalid_segment(desc->entry_number, 1);
+	} else if (errno == ENOSYS) {
+		printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+	} else {
+		if (desc->seg_32bit) {
+			printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
+			       errno);
+			nerrs++;
+		} else {
+			printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
+		}
+	}
+}
+
+static int safe_modify_ldt(int func, struct user_desc *ptr,
+			   unsigned long bytecount)
+{
+	int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount);
+	if (ret < -1)
+		errno = -ret;
+	return ret;
+}
+
+static void fail_install(struct user_desc *desc)
+{
+	if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) {
+		printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n");
+		nerrs++;
+	} else if (errno == ENOSYS) {
+		printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+	} else {
+		printf("[OK]\tmodify_ldt failure %d\n", errno);
+	}
+}
+
+static void do_simple_tests(void)
+{
+	struct user_desc desc = {
+		.entry_number    = 0,
+		.base_addr       = 0,
+		.limit           = 10,
+		.seg_32bit       = 1,
+		.contents        = 2, /* Code, not conforming */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 0,
+		.useable         = 0
+	};
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
+
+	desc.limit_in_pages = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+		      AR_S | AR_P | AR_DB | AR_G);
+
+	check_invalid_segment(1, 1);
+
+	desc.entry_number = 2;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+		      AR_S | AR_P | AR_DB | AR_G);
+
+	check_invalid_segment(1, 1);
+
+	desc.base_addr = 0xf0000000;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+		      AR_S | AR_P | AR_DB | AR_G);
+
+	desc.useable = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+		      AR_S | AR_P | AR_DB | AR_G | AR_AVL);
+
+	desc.seg_not_present = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+		      AR_S | AR_DB | AR_G | AR_AVL);
+
+	desc.seg_32bit = 0;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+		      AR_S | AR_G | AR_AVL);
+
+	desc.seg_32bit = 1;
+	desc.contents = 0;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA |
+		      AR_S | AR_DB | AR_G | AR_AVL);
+
+	desc.read_exec_only = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA |
+		      AR_S | AR_DB | AR_G | AR_AVL);
+
+	desc.contents = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN |
+		      AR_S | AR_DB | AR_G | AR_AVL);
+
+	desc.read_exec_only = 0;
+	desc.limit_in_pages = 0;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN |
+		      AR_S | AR_DB | AR_AVL);
+
+	desc.contents = 3;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF |
+		      AR_S | AR_DB | AR_AVL);
+
+	desc.read_exec_only = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF |
+		      AR_S | AR_DB | AR_AVL);
+
+	desc.read_exec_only = 0;
+	desc.contents = 2;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+		      AR_S | AR_DB | AR_AVL);
+
+	desc.read_exec_only = 1;
+
+#ifdef __x86_64__
+	desc.lm = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
+		      AR_S | AR_DB | AR_AVL);
+	desc.lm = 0;
+#endif
+
+	bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
+					 AR_S | AR_DB | AR_AVL);
+
+	if (entry1_okay) {
+		printf("[RUN]\tTest fork\n");
+		pid_t child = fork();
+		if (child == 0) {
+			nerrs = 0;
+			check_valid_segment(desc.entry_number, 1,
+					    AR_DPL3 | AR_TYPE_XOCODE |
+					    AR_S | AR_DB | AR_AVL, desc.limit,
+					    true);
+			check_invalid_segment(1, 1);
+			exit(nerrs ? 1 : 0);
+		} else {
+			int status;
+			if (waitpid(child, &status, 0) != child ||
+			    !WIFEXITED(status)) {
+				printf("[FAIL]\tChild died\n");
+				nerrs++;
+			} else if (WEXITSTATUS(status) != 0) {
+				printf("[FAIL]\tChild failed\n");
+				nerrs++;
+			} else {
+				printf("[OK]\tChild succeeded\n");
+			}
+		}
+
+		printf("[RUN]\tTest size\n");
+		int i;
+		for (i = 0; i < 8192; i++) {
+			desc.entry_number = i;
+			desc.limit = i;
+			if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
+				printf("[FAIL]\tFailed to install entry %d\n", i);
+				nerrs++;
+				break;
+			}
+		}
+		for (int j = 0; j < i; j++) {
+			check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE |
+					    AR_S | AR_DB | AR_AVL, j, false);
+		}
+		printf("[DONE]\tSize test\n");
+	} else {
+		printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n");
+	}
+
+	/* Test entry_number too high. */
+	desc.entry_number = 8192;
+	fail_install(&desc);
+
+	/* Test deletion and actions mistakeable for deletion. */
+	memset(&desc, 0, sizeof(desc));
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P);
+
+	desc.seg_not_present = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
+
+	desc.seg_not_present = 0;
+	desc.read_exec_only = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P);
+
+	desc.read_exec_only = 0;
+	desc.seg_not_present = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
+
+	desc.read_exec_only = 1;
+	desc.limit = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
+
+	desc.limit = 0;
+	desc.base_addr = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
+
+	desc.base_addr = 0;
+	install_invalid(&desc, false);
+
+	desc.seg_not_present = 0;
+	desc.seg_32bit = 1;
+	desc.read_exec_only = 0;
+	desc.limit = 0xfffff;
+
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
+
+	desc.limit_in_pages = 1;
+
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
+	desc.read_exec_only = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
+	desc.contents = 1;
+	desc.read_exec_only = 0;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+	desc.read_exec_only = 1;
+	install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+
+	desc.limit = 0;
+	install_invalid(&desc, true);
+}
+
+/*
+ * 0: thread is idle
+ * 1: thread armed
+ * 2: thread should clear LDT entry 0
+ * 3: thread should exit
+ */
+static volatile unsigned int ftx;
+
+static void *threadproc(void *ctx)
+{
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(1, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		err(1, "sched_setaffinity to CPU 1");	/* should never fail */
+
+	while (1) {
+		syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+		while (ftx != 2) {
+			if (ftx >= 3)
+				return NULL;
+		}
+
+		/* clear LDT entry 0 */
+		const struct user_desc desc = {};
+		if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0)
+			err(1, "modify_ldt");
+
+		/* If ftx == 2, set it to zero.  If ftx == 100, quit. */
+		unsigned int x = -2;
+		asm volatile ("lock xaddl %[x], %[ftx]" :
+			      [x] "+r" (x), [ftx] "+m" (ftx));
+		if (x != 2)
+			return NULL;
+	}
+}
+
+#ifdef __i386__
+
+#ifndef SA_RESTORE
+#define SA_RESTORER 0x04000000
+#endif
+
+/*
+ * The UAPI header calls this 'struct sigaction', which conflicts with
+ * glibc.  Sigh.
+ */
+struct fake_ksigaction {
+	void *handler;  /* the real type is nasty */
+	unsigned long sa_flags;
+	void (*sa_restorer)(void);
+	unsigned char sigset[8];
+};
+
+static void fix_sa_restorer(int sig)
+{
+	struct fake_ksigaction ksa;
+
+	if (syscall(SYS_rt_sigaction, sig, NULL, &ksa, 8) == 0) {
+		/*
+		 * glibc has a nasty bug: it sometimes writes garbage to
+		 * sa_restorer.  This interacts quite badly with anything
+		 * that fiddles with SS because it can trigger legacy
+		 * stack switching.  Patch it up.  See:
+		 *
+		 * https://sourceware.org/bugzilla/show_bug.cgi?id=21269
+		 */
+		if (!(ksa.sa_flags & SA_RESTORER) && ksa.sa_restorer) {
+			ksa.sa_restorer = NULL;
+			if (syscall(SYS_rt_sigaction, sig, &ksa, NULL,
+				    sizeof(ksa.sigset)) != 0)
+				err(1, "rt_sigaction");
+		}
+	}
+}
+#else
+static void fix_sa_restorer(int sig)
+{
+	/* 64-bit glibc works fine. */
+}
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+
+	fix_sa_restorer(sig);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+static void do_multicpu_tests(void)
+{
+	cpu_set_t cpuset;
+	pthread_t thread;
+	int failures = 0, iters = 5, i;
+	unsigned short orig_ss;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(1, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+		printf("[SKIP]\tCannot set affinity to CPU 1\n");
+		return;
+	}
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+		printf("[SKIP]\tCannot set affinity to CPU 0\n");
+		return;
+	}
+
+	sethandler(SIGSEGV, sigsegv, 0);
+#ifdef __i386__
+	/* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */
+	sethandler(SIGILL, sigsegv, 0);
+#endif
+
+	printf("[RUN]\tCross-CPU LDT invalidation\n");
+
+	if (pthread_create(&thread, 0, threadproc, 0) != 0)
+		err(1, "pthread_create");
+
+	asm volatile ("mov %%ss, %0" : "=rm" (orig_ss));
+
+	for (i = 0; i < 5; i++) {
+		if (sigsetjmp(jmpbuf, 1) != 0)
+			continue;
+
+		/* Make sure the thread is ready after the last test. */
+		while (ftx != 0)
+			;
+
+		struct user_desc desc = {
+			.entry_number    = 0,
+			.base_addr       = 0,
+			.limit           = 0xfffff,
+			.seg_32bit       = 1,
+			.contents        = 0, /* Data */
+			.read_exec_only  = 0,
+			.limit_in_pages  = 1,
+			.seg_not_present = 0,
+			.useable         = 0
+		};
+
+		if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
+			if (errno != ENOSYS)
+				err(1, "modify_ldt");
+			printf("[SKIP]\tmodify_ldt unavailable\n");
+			break;
+		}
+
+		/* Arm the thread. */
+		ftx = 1;
+		syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+		asm volatile ("mov %0, %%ss" : : "r" (0x7));
+
+		/* Go! */
+		ftx = 2;
+
+		while (ftx != 0)
+			;
+
+		/*
+		 * On success, modify_ldt will segfault us synchronously,
+		 * and we'll escape via siglongjmp.
+		 */
+
+		failures++;
+		asm volatile ("mov %0, %%ss" : : "rm" (orig_ss));
+	};
+
+	ftx = 100;  /* Kill the thread. */
+	syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+	if (pthread_join(thread, NULL) != 0)
+		err(1, "pthread_join");
+
+	if (failures) {
+		printf("[FAIL]\t%d of %d iterations failed\n", failures, iters);
+		nerrs++;
+	} else {
+		printf("[OK]\tAll %d iterations succeeded\n", iters);
+	}
+}
+
+static int finish_exec_test(void)
+{
+	/*
+	 * Older kernel versions did inherit the LDT on exec() which is
+	 * wrong because exec() starts from a clean state.
+	 */
+	check_invalid_segment(0, 1);
+
+	return nerrs ? 1 : 0;
+}
+
+static void do_exec_test(void)
+{
+	printf("[RUN]\tTest exec\n");
+
+	struct user_desc desc = {
+		.entry_number    = 0,
+		.base_addr       = 0,
+		.limit           = 42,
+		.seg_32bit       = 1,
+		.contents        = 2, /* Code, not conforming */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 0,
+		.useable         = 0
+	};
+	install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
+
+	pid_t child = fork();
+	if (child == 0) {
+		execl("/proc/self/exe", "ldt_gdt_test_exec", NULL);
+		printf("[FAIL]\tCould not exec self\n");
+		exit(1);	/* exec failed */
+	} else {
+		int status;
+		if (waitpid(child, &status, 0) != child ||
+		    !WIFEXITED(status)) {
+			printf("[FAIL]\tChild died\n");
+			nerrs++;
+		} else if (WEXITSTATUS(status) != 0) {
+			printf("[FAIL]\tChild failed\n");
+			nerrs++;
+		} else {
+			printf("[OK]\tChild succeeded\n");
+		}
+	}
+}
+
+static void setup_counter_page(void)
+{
+	unsigned int *page = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+			 MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+	if (page == MAP_FAILED)
+		err(1, "mmap");
+
+	for (int i = 0; i < 1024; i++)
+		page[i] = i;
+	counter_page = page;
+}
+
+static int invoke_set_thread_area(void)
+{
+	int ret;
+	asm volatile ("int $0x80"
+		      : "=a" (ret), "+m" (low_user_desc) :
+			"a" (243), "b" (low_user_desc)
+		      : INT80_CLOBBERS);
+	return ret;
+}
+
+static void setup_low_user_desc(void)
+{
+	low_user_desc = mmap(NULL, 2 * sizeof(struct user_desc),
+			     PROT_READ | PROT_WRITE,
+			     MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+	if (low_user_desc == MAP_FAILED)
+		err(1, "mmap");
+
+	low_user_desc->entry_number	= -1;
+	low_user_desc->base_addr	= (unsigned long)&counter_page[1];
+	low_user_desc->limit		= 0xfffff;
+	low_user_desc->seg_32bit	= 1;
+	low_user_desc->contents		= 0; /* Data, grow-up*/
+	low_user_desc->read_exec_only	= 0;
+	low_user_desc->limit_in_pages	= 1;
+	low_user_desc->seg_not_present	= 0;
+	low_user_desc->useable		= 0;
+
+	if (invoke_set_thread_area() == 0) {
+		gdt_entry_num = low_user_desc->entry_number;
+		printf("[NOTE]\tset_thread_area is available; will use GDT index %d\n", gdt_entry_num);
+	} else {
+		printf("[NOTE]\tset_thread_area is unavailable\n");
+	}
+
+	low_user_desc_clear = low_user_desc + 1;
+	low_user_desc_clear->entry_number = gdt_entry_num;
+	low_user_desc_clear->read_exec_only = 1;
+	low_user_desc_clear->seg_not_present = 1;
+}
+
+static void test_gdt_invalidation(void)
+{
+	if (!gdt_entry_num)
+		return;	/* 64-bit only system -- we can't use set_thread_area */
+
+	unsigned short prev_sel;
+	unsigned short sel;
+	unsigned int eax;
+	const char *result;
+#ifdef __x86_64__
+	unsigned long saved_base;
+	unsigned long new_base;
+#endif
+
+	/* Test DS */
+	invoke_set_thread_area();
+	eax = 243;
+	sel = (gdt_entry_num << 3) | 3;
+	asm volatile ("movw %%ds, %[prev_sel]\n\t"
+		      "movw %[sel], %%ds\n\t"
+#ifdef __i386__
+		      "pushl %%ebx\n\t"
+#endif
+		      "movl %[arg1], %%ebx\n\t"
+		      "int $0x80\n\t"	/* Should invalidate ds */
+#ifdef __i386__
+		      "popl %%ebx\n\t"
+#endif
+		      "movw %%ds, %[sel]\n\t"
+		      "movw %[prev_sel], %%ds"
+		      : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+			"+a" (eax)
+		      : "m" (low_user_desc_clear),
+			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+		      : INT80_CLOBBERS);
+
+	if (sel != 0) {
+		result = "FAIL";
+		nerrs++;
+	} else {
+		result = "OK";
+	}
+	printf("[%s]\tInvalidate DS with set_thread_area: new DS = 0x%hx\n",
+	       result, sel);
+
+	/* Test ES */
+	invoke_set_thread_area();
+	eax = 243;
+	sel = (gdt_entry_num << 3) | 3;
+	asm volatile ("movw %%es, %[prev_sel]\n\t"
+		      "movw %[sel], %%es\n\t"
+#ifdef __i386__
+		      "pushl %%ebx\n\t"
+#endif
+		      "movl %[arg1], %%ebx\n\t"
+		      "int $0x80\n\t"	/* Should invalidate es */
+#ifdef __i386__
+		      "popl %%ebx\n\t"
+#endif
+		      "movw %%es, %[sel]\n\t"
+		      "movw %[prev_sel], %%es"
+		      : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+			"+a" (eax)
+		      : "m" (low_user_desc_clear),
+			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+		      : INT80_CLOBBERS);
+
+	if (sel != 0) {
+		result = "FAIL";
+		nerrs++;
+	} else {
+		result = "OK";
+	}
+	printf("[%s]\tInvalidate ES with set_thread_area: new ES = 0x%hx\n",
+	       result, sel);
+
+	/* Test FS */
+	invoke_set_thread_area();
+	eax = 243;
+	sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+	syscall(SYS_arch_prctl, ARCH_GET_FS, &saved_base);
+#endif
+	asm volatile ("movw %%fs, %[prev_sel]\n\t"
+		      "movw %[sel], %%fs\n\t"
+#ifdef __i386__
+		      "pushl %%ebx\n\t"
+#endif
+		      "movl %[arg1], %%ebx\n\t"
+		      "int $0x80\n\t"	/* Should invalidate fs */
+#ifdef __i386__
+		      "popl %%ebx\n\t"
+#endif
+		      "movw %%fs, %[sel]\n\t"
+		      : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+			"+a" (eax)
+		      : "m" (low_user_desc_clear),
+			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+		      : INT80_CLOBBERS);
+
+#ifdef __x86_64__
+	syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
+#endif
+
+	/* Restore FS/BASE for glibc */
+	asm volatile ("movw %[prev_sel], %%fs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+	if (saved_base)
+		syscall(SYS_arch_prctl, ARCH_SET_FS, saved_base);
+#endif
+
+	if (sel != 0) {
+		result = "FAIL";
+		nerrs++;
+	} else {
+		result = "OK";
+	}
+	printf("[%s]\tInvalidate FS with set_thread_area: new FS = 0x%hx\n",
+	       result, sel);
+
+#ifdef __x86_64__
+	if (sel == 0 && new_base != 0) {
+		nerrs++;
+		printf("[FAIL]\tNew FSBASE was 0x%lx\n", new_base);
+	} else {
+		printf("[OK]\tNew FSBASE was zero\n");
+	}
+#endif
+
+	/* Test GS */
+	invoke_set_thread_area();
+	eax = 243;
+	sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+	syscall(SYS_arch_prctl, ARCH_GET_GS, &saved_base);
+#endif
+	asm volatile ("movw %%gs, %[prev_sel]\n\t"
+		      "movw %[sel], %%gs\n\t"
+#ifdef __i386__
+		      "pushl %%ebx\n\t"
+#endif
+		      "movl %[arg1], %%ebx\n\t"
+		      "int $0x80\n\t"	/* Should invalidate gs */
+#ifdef __i386__
+		      "popl %%ebx\n\t"
+#endif
+		      "movw %%gs, %[sel]\n\t"
+		      : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+			"+a" (eax)
+		      : "m" (low_user_desc_clear),
+			[arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+		      : INT80_CLOBBERS);
+
+#ifdef __x86_64__
+	syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
+#endif
+
+	/* Restore GS/BASE for glibc */
+	asm volatile ("movw %[prev_sel], %%gs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+	if (saved_base)
+		syscall(SYS_arch_prctl, ARCH_SET_GS, saved_base);
+#endif
+
+	if (sel != 0) {
+		result = "FAIL";
+		nerrs++;
+	} else {
+		result = "OK";
+	}
+	printf("[%s]\tInvalidate GS with set_thread_area: new GS = 0x%hx\n",
+	       result, sel);
+
+#ifdef __x86_64__
+	if (sel == 0 && new_base != 0) {
+		nerrs++;
+		printf("[FAIL]\tNew GSBASE was 0x%lx\n", new_base);
+	} else {
+		printf("[OK]\tNew GSBASE was zero\n");
+	}
+#endif
+}
+
+int main(int argc, char **argv)
+{
+	if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec"))
+		return finish_exec_test();
+
+	setup_counter_page();
+	setup_low_user_desc();
+
+	do_simple_tests();
+
+	do_multicpu_tests();
+
+	do_exec_test();
+
+	test_gdt_invalidation();
+
+	return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
new file mode 100644
index 000000000..3c3a02265
--- /dev/null
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
+ *
+ * This does MOV SS from a watchpointed address followed by various
+ * types of kernel entries.  A MOV SS that hits a watchpoint will queue
+ * up a #DB trap but will not actually deliver that trap.  The trap
+ * will be delivered after the next instruction instead.  The CPU's logic
+ * seems to be:
+ *
+ *  - Any fault: drop the pending #DB trap.
+ *  - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
+ *    deliver #DB.
+ *  - ICEBP: enter the kernel but do not deliver the watchpoint trap
+ *  - breakpoint: only one #DB is delivered (phew!)
+ *
+ * There are plenty of ways for a kernel to handle this incorrectly.  This
+ * test tries to exercise all the cases.
+ *
+ * This should mostly cover CVE-2018-1087 and CVE-2018-8897.
+ */
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <setjmp.h>
+#include <sys/prctl.h>
+
+#define X86_EFLAGS_RF (1UL << 16)
+
+#if __x86_64__
+# define REG_IP REG_RIP
+#else
+# define REG_IP REG_EIP
+#endif
+
+unsigned short ss;
+extern unsigned char breakpoint_insn[];
+sigjmp_buf jmpbuf;
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void enable_watchpoint(void)
+{
+	pid_t parent = getpid();
+	int status;
+
+	pid_t child = fork();
+	if (child < 0)
+		err(1, "fork");
+
+	if (child) {
+		if (waitpid(child, &status, 0) != child)
+			err(1, "waitpid for child");
+	} else {
+		unsigned long dr0, dr1, dr7;
+
+		dr0 = (unsigned long)&ss;
+		dr1 = (unsigned long)breakpoint_insn;
+		dr7 = ((1UL << 1) |	/* G0 */
+		       (3UL << 16) |	/* RW0 = read or write */
+		       (1UL << 18) |	/* LEN0 = 2 bytes */
+		       (1UL << 3));	/* G1, RW1 = insn */
+
+		if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
+			err(1, "PTRACE_ATTACH");
+
+		if (waitpid(parent, &status, 0) != parent)
+			err(1, "waitpid for child");
+
+		if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
+			err(1, "PTRACE_POKEUSER DR0");
+
+		if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
+			err(1, "PTRACE_POKEUSER DR1");
+
+		if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
+			err(1, "PTRACE_POKEUSER DR7");
+
+		printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
+
+		if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
+			err(1, "PTRACE_DETACH");
+
+		exit(0);
+	}
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static char const * const signames[] = {
+	[SIGSEGV] = "SIGSEGV",
+	[SIGBUS] = "SIBGUS",
+	[SIGTRAP] = "SIGTRAP",
+	[SIGILL] = "SIGILL",
+};
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+	ucontext_t *ctx = ctx_void;
+
+	printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
+	       (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
+	       !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
+}
+
+static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
+{
+	ucontext_t *ctx = ctx_void;
+
+	printf("\tGot %s with RIP=%lx\n", signames[sig],
+	       (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+}
+
+static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
+{
+	ucontext_t *ctx = ctx_void;
+
+	printf("\tGot %s with RIP=%lx\n", signames[sig],
+	       (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+
+	siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+	unsigned long nr;
+
+	asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
+	printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
+
+	if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
+		printf("\tPR_SET_PTRACER_ANY succeeded\n");
+
+	printf("\tSet up a watchpoint\n");
+	sethandler(SIGTRAP, sigtrap, 0);
+	enable_watchpoint();
+
+	printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
+	asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
+
+	printf("[RUN]\tMOV SS; INT3\n");
+	asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
+
+	printf("[RUN]\tMOV SS; INT 3\n");
+	asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
+
+	printf("[RUN]\tMOV SS; CS CS INT3\n");
+	asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
+
+	printf("[RUN]\tMOV SS; CSx14 INT3\n");
+	asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
+
+	printf("[RUN]\tMOV SS; INT 4\n");
+	sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+	asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
+
+#ifdef __i386__
+	printf("[RUN]\tMOV SS; INTO\n");
+	sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+	nr = -1;
+	asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
+		      : [tmp] "+r" (nr) : [ss] "m" (ss));
+#endif
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		printf("[RUN]\tMOV SS; ICEBP\n");
+
+		/* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
+		sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+
+		asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
+	}
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		printf("[RUN]\tMOV SS; CLI\n");
+		sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+		asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
+	}
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		printf("[RUN]\tMOV SS; #PF\n");
+		sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+		asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
+			      : [tmp] "=r" (nr) : [ss] "m" (ss));
+	}
+
+	/*
+	 * INT $1: if #DB has DPL=3 and there isn't special handling,
+	 * then the kernel will die.
+	 */
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		printf("[RUN]\tMOV SS; INT 1\n");
+		sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+		asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
+	}
+
+#ifdef __x86_64__
+	/*
+	 * In principle, we should test 32-bit SYSCALL as well, but
+	 * the calling convention is so unpredictable that it's
+	 * not obviously worth the effort.
+	 */
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		printf("[RUN]\tMOV SS; SYSCALL\n");
+		sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+		nr = SYS_getpid;
+		/*
+		 * Toggle the high bit of RSP to make it noncanonical to
+		 * strengthen this test on non-SMAP systems.
+		 */
+		asm volatile ("btc $63, %%rsp\n\t"
+			      "mov %[ss], %%ss; syscall\n\t"
+			      "btc $63, %%rsp"
+			      : "+a" (nr) : [ss] "m" (ss)
+			      : "rcx"
+#ifdef __x86_64__
+				, "r11"
+#endif
+			);
+	}
+#endif
+
+	printf("[RUN]\tMOV SS; breakpointed NOP\n");
+	asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
+
+	/*
+	 * Invoking SYSENTER directly breaks all the rules.  Just handle
+	 * the SIGSEGV.
+	 */
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		printf("[RUN]\tMOV SS; SYSENTER\n");
+		stack_t stack = {
+			.ss_sp = altstack_data,
+			.ss_size = SIGSTKSZ,
+		};
+		if (sigaltstack(&stack, NULL) != 0)
+			err(1, "sigaltstack");
+		sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
+		nr = SYS_getpid;
+		asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
+			      : [ss] "m" (ss) : "flags", "rcx"
+#ifdef __x86_64__
+				, "r11"
+#endif
+			);
+
+		/* We're unreachable here.  SYSENTER forgets RIP. */
+	}
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		printf("[RUN]\tMOV SS; INT $0x80\n");
+		sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+		nr = 20;	/* compat getpid */
+		asm volatile ("mov %[ss], %%ss; int $0x80"
+			      : "+a" (nr) : [ss] "m" (ss)
+			      : "flags"
+#ifdef __x86_64__
+				, "r8", "r9", "r10", "r11"
+#endif
+			);
+	}
+
+	printf("[OK]\tI aten't dead\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h
new file mode 100644
index 000000000..7546eba7f
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-debug.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MPX_DEBUG_H
+#define _MPX_DEBUG_H
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+#define dprintf5(args...) dprintf_level(5, args)
+
+#endif /* _MPX_DEBUG_H */
diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c
new file mode 100644
index 000000000..c13607ef5
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-dig.c
@@ -0,0 +1,499 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Written by Dave Hansen <dave.hansen@intel.com>
+ */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+#include "mpx-hw.h"
+
+unsigned long bounds_dir_global;
+
+#define mpx_dig_abort()	__mpx_dig_abort(__FILE__, __func__, __LINE__)
+static void inline __mpx_dig_abort(const char *file, const char *func, int line)
+{
+	fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
+	printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
+	abort();
+}
+
+/*
+ * run like this (BDIR finds the probably bounds directory):
+ *
+ *	BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
+ *		| head -1 | awk -F- '{print $1}')";
+ *	./mpx-dig $pid 0x$BDIR
+ *
+ * NOTE:
+ *	assumes that the only 2097152-kb VMA is the bounds dir
+ */
+
+long nr_incore(void *ptr, unsigned long size_bytes)
+{
+	int i;
+	long ret = 0;
+	long vec_len = size_bytes / PAGE_SIZE;
+	unsigned char *vec = malloc(vec_len);
+	int incore_ret;
+
+	if (!vec)
+		mpx_dig_abort();
+
+	incore_ret = mincore(ptr, size_bytes, vec);
+	if (incore_ret) {
+		printf("mincore ret: %d\n", incore_ret);
+		perror("mincore");
+		mpx_dig_abort();
+	}
+	for (i = 0; i < vec_len; i++)
+		ret += vec[i];
+	free(vec);
+	return ret;
+}
+
+int open_proc(int pid, char *file)
+{
+	static char buf[100];
+	int fd;
+
+	snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
+	fd = open(&buf[0], O_RDONLY);
+	if (fd < 0)
+		perror(buf);
+
+	return fd;
+}
+
+struct vaddr_range {
+	unsigned long start;
+	unsigned long end;
+};
+struct vaddr_range *ranges;
+int nr_ranges_allocated;
+int nr_ranges_populated;
+int last_range = -1;
+
+int __pid_load_vaddrs(int pid)
+{
+	int ret = 0;
+	int proc_maps_fd = open_proc(pid, "maps");
+	char linebuf[10000];
+	unsigned long start;
+	unsigned long end;
+	char rest[1000];
+	FILE *f = fdopen(proc_maps_fd, "r");
+
+	if (!f)
+		mpx_dig_abort();
+	nr_ranges_populated = 0;
+	while (!feof(f)) {
+		char *readret = fgets(linebuf, sizeof(linebuf), f);
+		int parsed;
+
+		if (readret == NULL) {
+			if (feof(f))
+				break;
+			mpx_dig_abort();
+		}
+
+		parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
+		if (parsed != 3)
+			mpx_dig_abort();
+
+		dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
+		if (nr_ranges_populated >= nr_ranges_allocated) {
+			ret = -E2BIG;
+			break;
+		}
+		ranges[nr_ranges_populated].start = start;
+		ranges[nr_ranges_populated].end = end;
+		nr_ranges_populated++;
+	}
+	last_range = -1;
+	fclose(f);
+	close(proc_maps_fd);
+	return ret;
+}
+
+int pid_load_vaddrs(int pid)
+{
+	int ret;
+
+	dprintf2("%s(%d)\n", __func__, pid);
+	if (!ranges) {
+		nr_ranges_allocated = 4;
+		ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
+		dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
+			 nr_ranges_allocated, ranges);
+		assert(ranges != NULL);
+	}
+	do {
+		ret = __pid_load_vaddrs(pid);
+		if (!ret)
+			break;
+		if (ret == -E2BIG) {
+			dprintf2("%s(%d) need to realloc\n", __func__, pid);
+			nr_ranges_allocated *= 2;
+			ranges = realloc(ranges,
+					nr_ranges_allocated * sizeof(ranges[0]));
+			dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
+					pid, nr_ranges_allocated, ranges);
+			assert(ranges != NULL);
+			dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
+		}
+	} while (1);
+
+	dprintf2("%s(%d) done\n", __func__, pid);
+
+	return ret;
+}
+
+static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
+{
+	if (vaddr < r->start)
+		return 0;
+	if (vaddr >= r->end)
+		return 0;
+	return 1;
+}
+
+static inline int vaddr_mapped_by_range(unsigned long vaddr)
+{
+	int i;
+
+	if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
+		return 1;
+
+	for (i = 0; i < nr_ranges_populated; i++) {
+		struct vaddr_range *r = &ranges[i];
+
+		if (vaddr_in_range(vaddr, r))
+			continue;
+		last_range = i;
+		return 1;
+	}
+	return 0;
+}
+
+const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
+
+void *read_bounds_table_into_buf(unsigned long table_vaddr)
+{
+#ifdef MPX_DIG_STANDALONE
+	static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
+	off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
+	if (seek_ret != table_vaddr)
+		mpx_dig_abort();
+
+	int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
+	if (read_ret != sizeof(bt_buf))
+		mpx_dig_abort();
+	return &bt_buf;
+#else
+	return (void *)table_vaddr;
+#endif
+}
+
+int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
+		unsigned long bde_vaddr)
+{
+	unsigned long offset_inside_bt;
+	int nr_entries = 0;
+	int do_abort = 0;
+	char *bt_buf;
+
+	dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
+			__func__, base_controlled_vaddr, bde_vaddr);
+
+	bt_buf = read_bounds_table_into_buf(table_vaddr);
+
+	dprintf4("%s() read done\n", __func__);
+
+	for (offset_inside_bt = 0;
+	     offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
+	     offset_inside_bt += bt_entry_size_bytes) {
+		unsigned long bt_entry_index;
+		unsigned long bt_entry_controls;
+		unsigned long this_bt_entry_for_vaddr;
+		unsigned long *bt_entry_buf;
+		int i;
+
+		dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
+			offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
+		bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
+		if (!bt_buf) {
+			printf("null bt_buf\n");
+			mpx_dig_abort();
+		}
+		if (!bt_entry_buf) {
+			printf("null bt_entry_buf\n");
+			mpx_dig_abort();
+		}
+		dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
+				bt_entry_buf);
+		if (!bt_entry_buf[0] &&
+		    !bt_entry_buf[1] &&
+		    !bt_entry_buf[2] &&
+		    !bt_entry_buf[3])
+			continue;
+
+		nr_entries++;
+
+		bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
+		bt_entry_controls = sizeof(void *);
+		this_bt_entry_for_vaddr =
+			base_controlled_vaddr + bt_entry_index*bt_entry_controls;
+		/*
+		 * We sign extend vaddr bits 48->63 which effectively
+		 * creates a hole in the virtual address space.
+		 * This calculation corrects for the hole.
+		 */
+		if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
+			this_bt_entry_for_vaddr |= 0xffff800000000000;
+
+		if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
+			printf("bt_entry_buf: %p\n", bt_entry_buf);
+			printf("there is a bte for %lx but no mapping\n",
+					this_bt_entry_for_vaddr);
+			printf("	  bde   vaddr: %016lx\n", bde_vaddr);
+			printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
+			printf("	  table_vaddr: %016lx\n", table_vaddr);
+			printf("	  entry vaddr: %016lx @ offset %lx\n",
+				table_vaddr + offset_inside_bt, offset_inside_bt);
+			do_abort = 1;
+			mpx_dig_abort();
+		}
+		if (DEBUG_LEVEL < 4)
+			continue;
+
+		printf("table entry[%lx]: ", offset_inside_bt);
+		for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
+			printf("0x%016lx ", bt_entry_buf[i]);
+		printf("\n");
+	}
+	if (do_abort)
+		mpx_dig_abort();
+	dprintf4("%s() done\n",  __func__);
+	return nr_entries;
+}
+
+int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
+		int *nr_populated_bdes)
+{
+	unsigned long i;
+	int total_entries = 0;
+
+	dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
+			len_bytes, bd_offset_bytes, buf + len_bytes);
+
+	for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
+		unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
+		unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
+		unsigned long bounds_dir_entry;
+		unsigned long bd_for_vaddr;
+		unsigned long bt_start;
+		unsigned long bt_tail;
+		int nr_entries;
+
+		dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
+				bounds_dir_entry_ptr);
+
+		bounds_dir_entry = *bounds_dir_entry_ptr;
+		if (!bounds_dir_entry) {
+			dprintf4("no bounds dir at index 0x%lx / 0x%lx "
+				 "start at offset:%lx %lx\n", bd_index, bd_index,
+					bd_offset_bytes, i);
+			continue;
+		}
+		dprintf3("found bounds_dir_entry: 0x%lx @ "
+			 "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
+					&buf[i]);
+		/* mask off the enable bit: */
+		bounds_dir_entry &= ~0x1;
+		(*nr_populated_bdes)++;
+		dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
+		dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
+
+		bt_start = bounds_dir_entry;
+		bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
+		if (!vaddr_mapped_by_range(bt_start)) {
+			printf("bounds directory 0x%lx points to nowhere\n",
+					bounds_dir_entry);
+			mpx_dig_abort();
+		}
+		if (!vaddr_mapped_by_range(bt_tail)) {
+			printf("bounds directory end 0x%lx points to nowhere\n",
+					bt_tail);
+			mpx_dig_abort();
+		}
+		/*
+		 * Each bounds directory entry controls 1MB of virtual address
+		 * space.  This variable is the virtual address in the process
+		 * of the beginning of the area controlled by this bounds_dir.
+		 */
+		bd_for_vaddr = bd_index * (1UL<<20);
+
+		nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
+				bounds_dir_global+bd_offset_bytes+i);
+		total_entries += nr_entries;
+		dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
+			 "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
+				bd_index, buf+i,
+				bounds_dir_entry, nr_entries, total_entries,
+				bd_for_vaddr, bd_for_vaddr + (1UL<<20));
+	}
+	dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
+			bd_offset_bytes);
+	return total_entries;
+}
+
+int proc_pid_mem_fd = -1;
+
+void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
+			   long buffer_size_bytes, void *buffer)
+{
+	unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
+	int read_ret;
+	off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
+
+	if (seek_ret != seekto)
+		mpx_dig_abort();
+
+	read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
+	/* there shouldn't practically be short reads of /proc/$pid/mem */
+	if (read_ret != buffer_size_bytes)
+		mpx_dig_abort();
+
+	return buffer;
+}
+void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
+			   long buffer_size_bytes, void *buffer)
+
+{
+	unsigned char vec[buffer_size_bytes / PAGE_SIZE];
+	char *dig_bounds_dir_ptr =
+		(void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
+	/*
+	 * use mincore() to quickly find the areas of the bounds directory
+	 * that have memory and thus will be worth scanning.
+	 */
+	int incore_ret;
+
+	int incore = 0;
+	int i;
+
+	dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
+
+	incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
+	if (incore_ret) {
+		printf("mincore ret: %d\n", incore_ret);
+		perror("mincore");
+		mpx_dig_abort();
+	}
+	for (i = 0; i < sizeof(vec); i++)
+		incore += vec[i];
+	dprintf4("%s() total incore: %d\n", __func__, incore);
+	if (!incore)
+		return NULL;
+	dprintf3("%s() total incore: %d\n", __func__, incore);
+	return dig_bounds_dir_ptr;
+}
+
+int inspect_pid(int pid)
+{
+	static int dig_nr;
+	long offset_inside_bounds_dir;
+	char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
+	char *dig_bounds_dir_ptr;
+	int total_entries = 0;
+	int nr_populated_bdes = 0;
+	int inspect_self;
+
+	if (getpid() == pid) {
+		dprintf4("inspecting self\n");
+		inspect_self = 1;
+	} else {
+		dprintf4("inspecting pid %d\n", pid);
+		mpx_dig_abort();
+	}
+
+	for (offset_inside_bounds_dir = 0;
+	     offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
+	     offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
+		static int bufs_skipped;
+		int this_entries;
+
+		if (inspect_self) {
+			dig_bounds_dir_ptr =
+				fill_bounds_dir_buf_self(offset_inside_bounds_dir,
+							 sizeof(bounds_dir_buf),
+							 &bounds_dir_buf[0]);
+		} else {
+			dig_bounds_dir_ptr =
+				fill_bounds_dir_buf_other(offset_inside_bounds_dir,
+							  sizeof(bounds_dir_buf),
+							  &bounds_dir_buf[0]);
+		}
+		if (!dig_bounds_dir_ptr) {
+			bufs_skipped++;
+			continue;
+		}
+		this_entries = search_bd_buf(dig_bounds_dir_ptr,
+					sizeof(bounds_dir_buf),
+					offset_inside_bounds_dir,
+					&nr_populated_bdes);
+		total_entries += this_entries;
+	}
+	printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
+			total_entries, nr_populated_bdes);
+	return total_entries + nr_populated_bdes;
+}
+
+#ifdef MPX_DIG_REMOTE
+int main(int argc, char **argv)
+{
+	int err;
+	char *c;
+	unsigned long bounds_dir_entry;
+	int pid;
+
+	printf("mpx-dig starting...\n");
+	err = sscanf(argv[1], "%d", &pid);
+	printf("parsing: '%s', err: %d\n", argv[1], err);
+	if (err != 1)
+		mpx_dig_abort();
+
+	err = sscanf(argv[2], "%lx", &bounds_dir_global);
+	printf("parsing: '%s': %d\n", argv[2], err);
+	if (err != 1)
+		mpx_dig_abort();
+
+	proc_pid_mem_fd = open_proc(pid, "mem");
+	if (proc_pid_mem_fd < 0)
+		mpx_dig_abort();
+
+	inspect_pid(pid);
+	return 0;
+}
+#endif
+
+long inspect_me(struct mpx_bounds_dir *bounds_dir)
+{
+	int pid = getpid();
+
+	pid_load_vaddrs(pid);
+	bounds_dir_global = (unsigned long)bounds_dir;
+	dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
+	return inspect_pid(pid);
+}
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
new file mode 100644
index 000000000..d1b61ab87
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-hw.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MPX_HW_H
+#define _MPX_HW_H
+
+#include <assert.h>
+
+/* Describe the MPX Hardware Layout in here */
+
+#define NR_MPX_BOUNDS_REGISTERS 4
+
+#ifdef __i386__
+
+#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES	16 /* 4 * 32-bits */
+#define MPX_BOUNDS_TABLE_SIZE_BYTES		(1ULL << 14) /* 16k */
+#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES		4
+#define MPX_BOUNDS_DIR_SIZE_BYTES		(1ULL << 22) /* 4MB */
+
+#define MPX_BOUNDS_TABLE_BOTTOM_BIT		2
+#define MPX_BOUNDS_TABLE_TOP_BIT		11
+#define MPX_BOUNDS_DIR_BOTTOM_BIT		12
+#define MPX_BOUNDS_DIR_TOP_BIT			31
+
+#else
+
+/*
+ * Linear Address of "pointer" (LAp)
+ *   0 ->  2: ignored
+ *   3 -> 19: index in to bounds table
+ *  20 -> 47: index in to bounds directory
+ *  48 -> 63: ignored
+ */
+
+#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES	32
+#define MPX_BOUNDS_TABLE_SIZE_BYTES		(1ULL << 22) /* 4MB */
+#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES		8
+#define MPX_BOUNDS_DIR_SIZE_BYTES		(1ULL << 31) /* 2GB */
+
+#define MPX_BOUNDS_TABLE_BOTTOM_BIT		3
+#define MPX_BOUNDS_TABLE_TOP_BIT		19
+#define MPX_BOUNDS_DIR_BOTTOM_BIT		20
+#define MPX_BOUNDS_DIR_TOP_BIT			47
+
+#endif
+
+#define MPX_BOUNDS_DIR_NR_ENTRIES	\
+	(MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
+#define MPX_BOUNDS_TABLE_NR_ENTRIES	\
+	(MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
+
+#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT	0x1
+
+struct mpx_bd_entry {
+	union {
+		char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
+		void *contents[0];
+	};
+} __attribute__((packed));
+
+struct mpx_bt_entry {
+	union {
+		char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
+		unsigned long contents[0];
+	};
+} __attribute__((packed));
+
+struct mpx_bounds_dir {
+	struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
+} __attribute__((packed));
+
+struct mpx_bounds_table {
+	struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
+} __attribute__((packed));
+
+static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
+{
+	int total_nr_bits = topbit - bottombit;
+	unsigned long mask = (1UL << total_nr_bits)-1;
+	return (val >> bottombit) & mask;
+}
+
+static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
+{
+	return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
+					      MPX_BOUNDS_TABLE_TOP_BIT);
+}
+
+static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
+{
+	return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
+					      MPX_BOUNDS_DIR_TOP_BIT);
+}
+
+static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
+		struct mpx_bounds_dir *bounds_dir)
+{
+	unsigned long index = __vaddr_bounds_directory_index(vaddr);
+	return &bounds_dir->entries[index];
+}
+
+static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
+{
+	unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
+	return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
+}
+
+static inline struct mpx_bounds_table *
+__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
+{
+	unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
+	assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
+	__bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
+	return (struct mpx_bounds_table *)__bd_entry;
+}
+
+static inline struct mpx_bt_entry *
+mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
+{
+	struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
+	struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
+	unsigned long index = __vaddr_bounds_table_index(vaddr);
+	return &bt->entries[index];
+}
+
+#endif /* _MPX_HW_H */
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
new file mode 100644
index 000000000..50f7e9272
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -0,0 +1,1616 @@
+/*
+ * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
+ *
+ * Written by:
+ * "Ren, Qiaowei" <qiaowei.ren@intel.com>
+ * "Wei, Gang" <gang.wei@intel.com>
+ * "Hansen, Dave" <dave.hansen@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2.
+ */
+
+/*
+ * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
+ *	       it works on 32-bit.
+ */
+
+int inspect_every_this_many_mallocs = 100;
+int zap_all_every_this_many_mallocs = 1000;
+
+#define _GNU_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "mpx-hw.h"
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline)
+#endif
+
+#ifndef TEST_DURATION_SECS
+#define TEST_DURATION_SECS 3
+#endif
+
+void write_int_to(char *prefix, char *file, int int_to_write)
+{
+	char buf[100];
+	int fd = open(file, O_RDWR);
+	int len;
+	int ret;
+
+	assert(fd >= 0);
+	len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
+	assert(len >= 0);
+	assert(len < sizeof(buf));
+	ret = write(fd, buf, len);
+	assert(ret == len);
+	ret = close(fd);
+	assert(!ret);
+}
+
+void write_pid_to(char *prefix, char *file)
+{
+	write_int_to(prefix, file, getpid());
+}
+
+void trace_me(void)
+{
+/* tracing events dir */
+#define TED "/sys/kernel/debug/tracing/events/"
+/*
+	write_pid_to("common_pid=", TED "signal/filter");
+	write_pid_to("common_pid=", TED "exceptions/filter");
+	write_int_to("", TED "signal/enable", 1);
+	write_int_to("", TED "exceptions/enable", 1);
+*/
+	write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
+	write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
+}
+
+#define test_failed() __test_failed(__FILE__, __LINE__)
+static void __test_failed(char *f, int l)
+{
+	fprintf(stderr, "abort @ %s::%d\n", f, l);
+	abort();
+}
+
+/* Error Printf */
+#define eprintf(args...)	fprintf(stderr, args)
+
+#ifdef __i386__
+
+/* i386 directory size is 4MB */
+#define REG_IP_IDX	REG_EIP
+#define REX_PREFIX
+
+#define XSAVE_OFFSET_IN_FPMEM	sizeof(struct _libc_fpstate)
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+		unsigned int *ecx, unsigned int *edx)
+{
+	/* ecx is often an input as well as an output. */
+	asm volatile(
+		"push %%ebx;"
+		"cpuid;"
+		"mov %%ebx, %1;"
+		"pop %%ebx"
+		: "=a" (*eax),
+		  "=g" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+#else /* __i386__ */
+
+#define REG_IP_IDX	REG_RIP
+#define REX_PREFIX "0x48, "
+
+#define XSAVE_OFFSET_IN_FPMEM	0
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+		unsigned int *ecx, unsigned int *edx)
+{
+	/* ecx is often an input as well as an output. */
+	asm volatile(
+		"cpuid;"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+#endif /* !__i386__ */
+
+struct xsave_hdr_struct {
+	uint64_t xstate_bv;
+	uint64_t reserved1[2];
+	uint64_t reserved2[5];
+} __attribute__((packed));
+
+struct bndregs_struct {
+	uint64_t bndregs[8];
+} __attribute__((packed));
+
+struct bndcsr_struct {
+	uint64_t cfg_reg_u;
+	uint64_t status_reg;
+} __attribute__((packed));
+
+struct xsave_struct {
+	uint8_t fpu_sse[512];
+	struct xsave_hdr_struct xsave_hdr;
+	uint8_t ymm[256];
+	uint8_t lwp[128];
+	struct bndregs_struct bndregs;
+	struct bndcsr_struct bndcsr;
+} __attribute__((packed));
+
+uint8_t __attribute__((__aligned__(64))) buffer[4096];
+struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
+
+uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
+struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
+
+uint64_t num_bnd_chk;
+
+static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
+{
+	uint32_t lmask = mask;
+	uint32_t hmask = mask >> 32;
+
+	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
+		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+		     :   "memory");
+}
+
+static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
+{
+	uint32_t lmask = mask;
+	uint32_t hmask = mask >> 32;
+	unsigned char *fx = _fx;
+
+	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+		     :   "memory");
+}
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+	uint32_t eax, edx;
+
+	asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+		     : "=a" (eax), "=d" (edx)
+		     : "c" (index));
+	return eax + ((uint64_t)edx << 32);
+}
+
+static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
+{
+	memset(buffer, 0, sizeof(buffer));
+	memcpy(buffer,
+		(uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
+		sizeof(struct xsave_struct));
+
+	return xsave_buf->bndcsr.status_reg;
+}
+
+#include <pthread.h>
+
+static uint8_t *get_next_inst_ip(uint8_t *addr)
+{
+	uint8_t *ip = addr;
+	uint8_t sib;
+	uint8_t rm;
+	uint8_t mod;
+	uint8_t base;
+	uint8_t modrm;
+
+	/* determine the prefix. */
+	switch(*ip) {
+	case 0xf2:
+	case 0xf3:
+	case 0x66:
+		ip++;
+		break;
+	}
+
+	/* look for rex prefix */
+	if ((*ip & 0x40) == 0x40)
+		ip++;
+
+	/* Make sure we have a MPX instruction. */
+	if (*ip++ != 0x0f)
+		return addr;
+
+	/* Skip the op code byte. */
+	ip++;
+
+	/* Get the modrm byte. */
+	modrm = *ip++;
+
+	/* Break it down into parts. */
+	rm = modrm & 7;
+	mod = (modrm >> 6);
+
+	/* Init the parts of the address mode. */
+	base = 8;
+
+	/* Is it a mem mode? */
+	if (mod != 3) {
+		/* look for scaled indexed addressing */
+		if (rm == 4) {
+			/* SIB addressing */
+			sib = *ip++;
+			base = sib & 7;
+			switch (mod) {
+			case 0:
+				if (base == 5)
+					ip += 4;
+				break;
+
+			case 1:
+				ip++;
+				break;
+
+			case 2:
+				ip += 4;
+				break;
+			}
+
+		} else {
+			/* MODRM addressing */
+			switch (mod) {
+			case 0:
+				/* DISP32 addressing, no base */
+				if (rm == 5)
+					ip += 4;
+				break;
+
+			case 1:
+				ip++;
+				break;
+
+			case 2:
+				ip += 4;
+				break;
+			}
+		}
+	}
+	return ip;
+}
+
+#ifdef si_lower
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+	return si->si_lower;
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+	return si->si_upper;
+}
+#else
+
+/*
+ * This deals with old version of _sigfault in some distros:
+ *
+
+old _sigfault:
+        struct {
+            void *si_addr;
+	} _sigfault;
+
+new _sigfault:
+	struct {
+		void __user *_addr;
+		int _trapno;
+		short _addr_lsb;
+		union {
+			struct {
+				void __user *_lower;
+				void __user *_upper;
+			} _addr_bnd;
+			__u32 _pkey;
+		};
+	} _sigfault;
+ *
+ */
+
+static inline void **__si_bounds_hack(siginfo_t *si)
+{
+	void *sigfault = &si->_sifields._sigfault;
+	void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
+	int *trapno = (int*)end_sigfault;
+	/* skip _trapno and _addr_lsb */
+	void **__si_lower = (void**)(trapno + 2);
+
+	return __si_lower;
+}
+
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+	return *__si_bounds_hack(si);
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+	return *(__si_bounds_hack(si) + 1);
+}
+#endif
+
+static int br_count;
+static int expected_bnd_index = -1;
+uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
+unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
+
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR	3
+#endif
+
+/*
+ * The kernel is supposed to provide some information about the bounds
+ * exception in the siginfo.  It should match what we have in the bounds
+ * registers that we are checking against.  Just check against the shadow copy
+ * since it is easily available, and we also check that *it* matches the real
+ * registers.
+ */
+void check_siginfo_vs_shadow(siginfo_t* si)
+{
+	int siginfo_ok = 1;
+	void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
+	void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
+
+	if ((expected_bnd_index < 0) ||
+	    (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
+		fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
+			expected_bnd_index);
+		exit(6);
+	}
+	if (__si_bounds_lower(si) != shadow_lower)
+		siginfo_ok = 0;
+	if (__si_bounds_upper(si) != shadow_upper)
+		siginfo_ok = 0;
+
+	if (!siginfo_ok) {
+		fprintf(stderr, "ERROR: siginfo bounds do not match "
+			"shadow bounds for register %d\n", expected_bnd_index);
+		exit(7);
+	}
+}
+
+void handler(int signum, siginfo_t *si, void *vucontext)
+{
+	int i;
+	ucontext_t *uctxt = vucontext;
+	int trapno;
+	unsigned long ip;
+
+	dprintf1("entered signal handler\n");
+
+	trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+	ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+
+	if (trapno == 5) {
+		typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
+		uint64_t status = read_mpx_status_sig(uctxt);
+		uint64_t br_reason =  status & 0x3;
+
+		br_count++;
+		dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
+
+		dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
+				status, ip, br_reason);
+		dprintf2("si_signo: %d\n", si->si_signo);
+		dprintf2("  signum: %d\n", signum);
+		dprintf2("info->si_code == SEGV_BNDERR: %d\n",
+				(si->si_code == SEGV_BNDERR));
+		dprintf2("info->si_code: %d\n", si->si_code);
+		dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
+		dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
+
+		for (i = 0; i < 8; i++)
+			dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
+		switch (br_reason) {
+		case 0: /* traditional BR */
+			fprintf(stderr,
+				"Undefined status with bound exception:%jx\n",
+				 status);
+			exit(5);
+		case 1: /* #BR MPX bounds exception */
+			/* these are normal and we expect to see them */
+
+			check_siginfo_vs_shadow(si);
+
+			dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
+				status, (void *)ip, si->si_addr);
+			num_bnd_chk++;
+			uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+				(greg_t)get_next_inst_ip((uint8_t *)ip);
+			break;
+		case 2:
+			fprintf(stderr, "#BR status == 2, missing bounds table,"
+					"kernel should have handled!!\n");
+			exit(4);
+			break;
+		default:
+			fprintf(stderr, "bound check error: status 0x%jx at %p\n",
+				status, (void *)ip);
+			num_bnd_chk++;
+			uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+				(greg_t)get_next_inst_ip((uint8_t *)ip);
+			fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
+			exit(3);
+		}
+	} else if (trapno == 14) {
+		eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
+			trapno, ip);
+		eprintf("si_addr %p\n", si->si_addr);
+		eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+		test_failed();
+	} else {
+		eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
+		eprintf("si_addr %p\n", si->si_addr);
+		eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+		test_failed();
+	}
+}
+
+static inline void cpuid_count(unsigned int op, int count,
+			       unsigned int *eax, unsigned int *ebx,
+			       unsigned int *ecx, unsigned int *edx)
+{
+	*eax = op;
+	*ecx = count;
+	__cpuid(eax, ebx, ecx, edx);
+}
+
+#define XSTATE_CPUID	    0x0000000d
+
+/*
+ * List of XSAVE features Linux knows about:
+ */
+enum xfeature_bit {
+	XSTATE_BIT_FP,
+	XSTATE_BIT_SSE,
+	XSTATE_BIT_YMM,
+	XSTATE_BIT_BNDREGS,
+	XSTATE_BIT_BNDCSR,
+	XSTATE_BIT_OPMASK,
+	XSTATE_BIT_ZMM_Hi256,
+	XSTATE_BIT_Hi16_ZMM,
+
+	XFEATURES_NR_MAX,
+};
+
+#define XSTATE_FP	       (1 << XSTATE_BIT_FP)
+#define XSTATE_SSE	      (1 << XSTATE_BIT_SSE)
+#define XSTATE_YMM	      (1 << XSTATE_BIT_YMM)
+#define XSTATE_BNDREGS	  (1 << XSTATE_BIT_BNDREGS)
+#define XSTATE_BNDCSR	   (1 << XSTATE_BIT_BNDCSR)
+#define XSTATE_OPMASK	   (1 << XSTATE_BIT_OPMASK)
+#define XSTATE_ZMM_Hi256	(1 << XSTATE_BIT_ZMM_Hi256)
+#define XSTATE_Hi16_ZMM	 (1 << XSTATE_BIT_Hi16_ZMM)
+
+#define MPX_XSTATES		(XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
+
+bool one_bit(unsigned int x, int bit)
+{
+	return !!(x & (1<<bit));
+}
+
+void print_state_component(int state_bit_nr, char *name)
+{
+	unsigned int eax, ebx, ecx, edx;
+	unsigned int state_component_size;
+	unsigned int state_component_supervisor;
+	unsigned int state_component_user;
+	unsigned int state_component_aligned;
+
+	/* See SDM Section 13.2 */
+	cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
+	assert(eax || ebx || ecx);
+	state_component_size = eax;
+	state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
+	state_component_user = !one_bit(ecx, 0);
+	state_component_aligned = one_bit(ecx, 1);
+	printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
+		name,
+		state_component_size,	    state_component_user,
+		state_component_supervisor, state_component_aligned);
+
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
+#define XSAVE_FEATURE_BIT       (26)  /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define OSXSAVE_FEATURE_BIT     (27) /* XSAVE enabled in the OS */
+
+bool check_mpx_support(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
+
+	/* We can't do much without XSAVE, so just make these assert()'s */
+	if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
+		fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
+		exit(0);
+	}
+
+	if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
+		fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
+		exit(0);
+	}
+
+	/* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
+	/* Is this redundant with the feature bit checks? */
+	cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
+	if (eax < XSTATE_CPUID) {
+		fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
+				" can not run MPX tests\n");
+		exit(0);
+	}
+
+	printf("XSAVE is supported by HW & OS\n");
+
+	cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+
+	printf("XSAVE processor supported state mask: 0x%x\n", eax);
+	printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
+
+	/* Make sure that the MPX states are enabled in in XCR0 */
+	if ((eax & MPX_XSTATES) != MPX_XSTATES) {
+		fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
+		exit(0);
+	}
+
+	/* Make sure the MPX states are supported by XSAVE* */
+	if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
+		fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
+				"can not run MPX tests\n");
+		exit(0);
+	}
+
+	print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
+	print_state_component(XSTATE_BIT_BNDCSR,  "BNDCSR");
+
+	return true;
+}
+
+void enable_mpx(void *l1base)
+{
+	/* enable point lookup */
+	memset(buffer, 0, sizeof(buffer));
+	xrstor_state(xsave_buf, 0x18);
+
+	xsave_buf->xsave_hdr.xstate_bv = 0x10;
+	xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
+	xsave_buf->bndcsr.status_reg = 0;
+
+	dprintf2("bf xrstor\n");
+	dprintf2("xsave cndcsr: status %jx, configu %jx\n",
+	       xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+	xrstor_state(xsave_buf, 0x18);
+	dprintf2("after xrstor\n");
+
+	xsave_state_1(xsave_buf, 0x18);
+
+	dprintf1("xsave bndcsr: status %jx, configu %jx\n",
+	       xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+}
+
+#include <sys/prctl.h>
+
+struct mpx_bounds_dir *bounds_dir_ptr;
+
+unsigned long __bd_incore(const char *func, int line)
+{
+	unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
+	return ret;
+}
+#define bd_incore() __bd_incore(__func__, __LINE__)
+
+void check_clear(void *ptr, unsigned long sz)
+{
+	unsigned long *i;
+
+	for (i = ptr; (void *)i < ptr + sz; i++) {
+		if (*i) {
+			dprintf1("%p is NOT clear at %p\n", ptr, i);
+			assert(0);
+		}
+	}
+	dprintf1("%p is clear for %lx\n", ptr, sz);
+}
+
+void check_clear_bd(void)
+{
+	check_clear(bounds_dir_ptr, 2UL << 30);
+}
+
+#define USE_MALLOC_FOR_BOUNDS_DIR 1
+bool process_specific_init(void)
+{
+	unsigned long size;
+	unsigned long *dir;
+	/* Guarantee we have the space to align it, add padding: */
+	unsigned long pad = getpagesize();
+
+	size = 2UL << 30; /* 2GB */
+	if (sizeof(unsigned long) == 4)
+		size = 4UL << 20; /* 4MB */
+	dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
+
+	if (USE_MALLOC_FOR_BOUNDS_DIR) {
+		unsigned long _dir;
+
+		dir = malloc(size + pad);
+		assert(dir);
+		_dir = (unsigned long)dir;
+		_dir += 0xfffUL;
+		_dir &= ~0xfffUL;
+		dir = (void *)_dir;
+	} else {
+		/*
+		 * This makes debugging easier because the address
+		 * calculations are simpler:
+		 */
+		dir = mmap((void *)0x200000000000, size + pad,
+				PROT_READ|PROT_WRITE,
+				MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+		if (dir == (void *)-1) {
+			perror("unable to allocate bounds directory");
+			abort();
+		}
+		check_clear(dir, size);
+	}
+	bounds_dir_ptr = (void *)dir;
+	madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
+	bd_incore();
+	dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
+			(char *)bounds_dir_ptr + size);
+	check_clear(dir, size);
+	enable_mpx(dir);
+	check_clear(dir, size);
+	if (prctl(43, 0, 0, 0, 0)) {
+		printf("no MPX support\n");
+		abort();
+		return false;
+	}
+	return true;
+}
+
+bool process_specific_finish(void)
+{
+	if (prctl(44)) {
+		printf("no MPX support\n");
+		return false;
+	}
+	return true;
+}
+
+void setup_handler()
+{
+	int r, rs;
+	struct sigaction newact;
+	struct sigaction oldact;
+
+	/* #BR is mapped to sigsegv */
+	int signum  = SIGSEGV;
+
+	newact.sa_handler = 0;   /* void(*)(int)*/
+	newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
+
+	/*sigset_t - signals to block while in the handler */
+	/* get the old signal mask. */
+	rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+	assert(rs == 0);
+
+	/* call sa_sigaction, not sa_handler*/
+	newact.sa_flags = SA_SIGINFO;
+
+	newact.sa_restorer = 0;  /* void(*)(), obsolete */
+	r = sigaction(signum, &newact, &oldact);
+	assert(r == 0);
+}
+
+void mpx_prepare(void)
+{
+	dprintf2("%s()\n", __func__);
+	setup_handler();
+	process_specific_init();
+}
+
+void mpx_cleanup(void)
+{
+	printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
+	process_specific_finish();
+}
+
+/*-------------- the following is test case ---------------*/
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+uint64_t num_lower_brs;
+uint64_t num_upper_brs;
+
+#define MPX_CONFIG_OFFSET 1024
+#define MPX_BOUNDS_OFFSET 960
+#define MPX_HEADER_OFFSET 512
+#define MAX_ADDR_TESTED (1<<28)
+#define TEST_ROUNDS 100
+
+/*
+      0F 1A /r BNDLDX-Load
+      0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
+   66 0F 1A /r BNDMOV bnd1, bnd2/m128
+   66 0F 1B /r BNDMOV bnd1/m128, bnd2
+   F2 0F 1A /r BNDCU bnd, r/m64
+   F2 0F 1B /r BNDCN bnd, r/m64
+   F3 0F 1A /r BNDCL bnd, r/m64
+   F3 0F 1B /r BNDMK bnd, m64
+*/
+
+static __always_inline void xsave_state(void *_fx, uint64_t mask)
+{
+	uint32_t lmask = mask;
+	uint32_t hmask = mask >> 32;
+	unsigned char *fx = _fx;
+
+	asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+		     : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+		     :   "memory");
+}
+
+static __always_inline void mpx_clear_bnd0(void)
+{
+	long size = 0;
+	void *ptr = NULL;
+	/* F3 0F 1B /r BNDMK bnd, m64			*/
+	/* f3 0f 1b 04 11    bndmk  (%rcx,%rdx,1),%bnd0	*/
+	asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+		     : : "c" (ptr), "d" (size-1)
+		     :   "memory");
+}
+
+static __always_inline void mpx_make_bound_helper(unsigned long ptr,
+		unsigned long size)
+{
+	/* F3 0F 1B /r		BNDMK bnd, m64			*/
+	/* f3 0f 1b 04 11       bndmk  (%rcx,%rdx,1),%bnd0	*/
+	asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+		     : : "c" (ptr), "d" (size-1)
+		     :   "memory");
+}
+
+static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
+{
+	/* F3 0F 1A /r	NDCL bnd, r/m64			*/
+	/* f3 0f 1a 01	bndcl  (%rcx),%bnd0		*/
+	asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
+		     : : "c" (ptr)
+		     :   "memory");
+}
+
+static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
+{
+	/* F2 0F 1A /r	BNDCU bnd, r/m64	*/
+	/* f2 0f 1a 01	bndcu  (%rcx),%bnd0	*/
+	asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
+		     : : "c" (ptr)
+		     :   "memory");
+}
+
+static __always_inline void mpx_movbndreg_helper()
+{
+	/* 66 0F 1B /r	BNDMOV bnd1/m128, bnd2	*/
+	/* 66 0f 1b c2	bndmov %bnd0,%bnd2	*/
+
+	asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
+}
+
+static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
+{
+	/* 66 0F 1B /r	BNDMOV bnd1/m128, bnd2	*/
+	/* 66 0f 1b 01	bndmov %bnd0,(%rcx)	*/
+	asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
+		     : : "c" (mem)
+		     :   "memory");
+}
+
+static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
+{
+	/* 66 0F 1A /r	BNDMOV bnd1, bnd2/m128	*/
+	/* 66 0f 1a 01	bndmov (%rcx),%bnd0	*/
+	asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
+		     : : "c" (mem)
+		     :   "memory");
+}
+
+static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
+		unsigned long ptr_val)
+{
+	/* 0F 1B /r	BNDSTX-Store Extended Bounds Using Address Translation	*/
+	/* 0f 1b 04 11	bndstx %bnd0,(%rcx,%rdx,1)				*/
+	asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
+		     : : "c" (ptr_addr), "d" (ptr_val)
+		     :   "memory");
+}
+
+static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
+		unsigned long ptr_val)
+{
+	/* 0F 1A /r	BNDLDX-Load			*/
+	/*/ 0f 1a 04 11	bndldx (%rcx,%rdx,1),%bnd0	*/
+	asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
+		     : : "c" (ptr_addr), "d" (ptr_val)
+		     :   "memory");
+}
+
+void __print_context(void *__print_xsave_buffer, int line)
+{
+	uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
+	uint64_t *cfg    = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
+
+	int i;
+	eprintf("%s()::%d\n", "print_context", line);
+	for (i = 0; i < 4; i++) {
+		eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
+		       (unsigned long)bounds[i*2],
+		       ~(unsigned long)bounds[i*2+1],
+			(unsigned long)bounds[i*2+1]);
+	}
+
+	eprintf("cpcfg: %jx  cpstatus: %jx\n", cfg[0], cfg[1]);
+}
+#define print_context(x) __print_context(x, __LINE__)
+#ifdef DEBUG
+#define dprint_context(x) print_context(x)
+#else
+#define dprint_context(x) do{}while(0)
+#endif
+
+void init()
+{
+	int i;
+
+	srand((unsigned int)time(NULL));
+
+	for (i = 0; i < 4; i++) {
+		shadow_plb[i][0] = 0;
+		shadow_plb[i][1] = ~(unsigned long)0;
+	}
+}
+
+long int __mpx_random(int line)
+{
+#ifdef NOT_SO_RANDOM
+	static long fake = 722122311;
+	fake += 563792075;
+	return fakse;
+#else
+	return random();
+#endif
+}
+#define mpx_random() __mpx_random(__LINE__)
+
+uint8_t *get_random_addr()
+{
+	uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
+	return (addr - (unsigned long)addr % sizeof(uint8_t *));
+}
+
+static inline bool compare_context(void *__xsave_buffer)
+{
+	uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
+
+	int i;
+	for (i = 0; i < 4; i++) {
+		dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
+		       i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+		       i, (unsigned long)bounds[i*2],     ~(unsigned long)bounds[i*2+1]);
+		if ((shadow_plb[i][0] != bounds[i*2]) ||
+		    (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
+			eprintf("ERROR comparing shadow to real bound register %d\n", i);
+			eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
+			       (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+			       (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
+			return false;
+		}
+	}
+
+	return true;
+}
+
+void mkbnd_shadow(uint8_t *ptr, int index, long offset)
+{
+	uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+	uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
+	*lower = (unsigned long)ptr;
+	*upper = (unsigned long)ptr + offset - 1;
+}
+
+void check_lowerbound_shadow(uint8_t *ptr, int index)
+{
+	uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+	if (*lower > (uint64_t)(unsigned long)ptr)
+		num_lower_brs++;
+	else
+		dprintf1("LowerBoundChk passed:%p\n", ptr);
+}
+
+void check_upperbound_shadow(uint8_t *ptr, int index)
+{
+	uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
+	if (upper < (uint64_t)(unsigned long)ptr)
+		num_upper_brs++;
+	else
+		dprintf1("UpperBoundChk passed:%p\n", ptr);
+}
+
+__always_inline void movbndreg_shadow(int src, int dest)
+{
+	shadow_plb[dest][0] = shadow_plb[src][0];
+	shadow_plb[dest][1] = shadow_plb[src][1];
+}
+
+__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
+{
+	unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
+	unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
+	*dest = *lower;
+	*(dest+1) = *upper;
+}
+
+__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
+{
+	unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
+	unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
+	*lower = *src;
+	*upper = *(src+1);
+}
+
+__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+	shadow_map[0] = (unsigned long)shadow_plb[index][0];
+	shadow_map[1] = (unsigned long)shadow_plb[index][1];
+	shadow_map[2] = (unsigned long)ptr_val;
+	dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
+			index, ptr, ptr_val, ptr_val);
+	/*ptr ignored */
+}
+
+void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+	uint64_t lower = shadow_map[0];
+	uint64_t upper = shadow_map[1];
+	uint8_t *value = (uint8_t *)shadow_map[2];
+
+	if (value != ptr_val) {
+		dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
+			 "because %p != %p\n", __func__, index, ptr,
+			 ptr_val, index, value, ptr_val);
+		shadow_plb[index][0] = 0;
+		shadow_plb[index][1] = ~(unsigned long)0;
+	} else {
+		shadow_plb[index][0] = lower;
+		shadow_plb[index][1] = upper;
+	}
+	/* ptr ignored */
+}
+
+static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
+{
+	mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+}
+
+static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	mkbnd_shadow(ptr, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
+{
+	/* these are hard-coded to check bnd0 */
+	expected_bnd_index = 0;
+	mpx_check_lowerbound_helper((unsigned long)(ptr-1));
+	mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
+	/* reset this since we do not expect any more bounds exceptions */
+	expected_bnd_index = -1;
+}
+
+static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	check_lowerbound_shadow(ptr-1, 0);
+	check_upperbound_shadow(ptr+0x1800, 0);
+}
+
+static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
+{
+	mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+	mpx_movbndreg_helper();
+	mpx_movbnd2mem_helper(buf);
+	mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	mkbnd_shadow(ptr, 0, 0x1800);
+	movbndreg_shadow(0, 2);
+	movbnd2mem_shadow(0, (unsigned long *)buf);
+	mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
+{
+	mpx_movbnd_from_mem_helper(buf);
+}
+
+static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	movbnd_from_mem_shadow((unsigned long *)buf, 0);
+}
+
+static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
+{
+	mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+	mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	stdsc_shadow(0, buf, ptr);
+	mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
+{
+	mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+}
+
+static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
+{
+	lddsc_shadow(0, buf, ptr);
+}
+
+#define NR_MPX_TEST_FUNCTIONS 6
+
+/*
+ * For compatibility reasons, MPX will clear the bounds registers
+ * when you make function calls (among other things).  We have to
+ * preserve the registers in between calls to the "helpers" since
+ * they build on each other.
+ *
+ * Be very careful not to make any function calls inside the
+ * helpers, or anywhere else beween the xrstor and xsave.
+ */
+#define run_helper(helper_nr, buf, buf_shadow, ptr)	do {	\
+	xrstor_state(xsave_test_buf, flags);			\
+	mpx_test_helper##helper_nr(buf, ptr);			\
+	xsave_state(xsave_test_buf, flags);			\
+	mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr);	\
+} while (0)
+
+static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
+{
+	uint64_t flags = 0x18;
+
+	dprint_context(xsave_test_buf);
+	switch (nr) {
+	case 0:
+		run_helper(0, buf, buf_shadow, ptr);
+		break;
+	case 1:
+		run_helper(1, buf, buf_shadow, ptr);
+		break;
+	case 2:
+		run_helper(2, buf, buf_shadow, ptr);
+		break;
+	case 3:
+		run_helper(3, buf, buf_shadow, ptr);
+		break;
+	case 4:
+		run_helper(4, buf, buf_shadow, ptr);
+		break;
+	case 5:
+		run_helper(5, buf, buf_shadow, ptr);
+		break;
+	default:
+		test_failed();
+		break;
+	}
+	dprint_context(xsave_test_buf);
+}
+
+unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
+extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
+
+long cover_buf_with_bt_entries(void *buf, long buf_len)
+{
+	int i;
+	long nr_to_fill;
+	int ratio = 1000;
+	unsigned long buf_len_in_ptrs;
+
+	/* Fill about 1/100 of the space with bt entries */
+	nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
+
+	if (!nr_to_fill)
+		dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
+
+	/* Align the buffer to pointer size */
+	while (((unsigned long)buf) % sizeof(void *)) {
+		buf++;
+		buf_len--;
+	}
+	/* We are storing pointers, so make */
+	buf_len_in_ptrs = buf_len / sizeof(void *);
+
+	for (i = 0; i < nr_to_fill; i++) {
+		long index = (mpx_random() % buf_len_in_ptrs);
+		void *ptr = buf + index * sizeof(unsigned long);
+		unsigned long ptr_addr = (unsigned long)ptr;
+
+		/* ptr and size can be anything */
+		mpx_make_bound_helper((unsigned long)ptr, 8);
+
+		/*
+		 * take bnd0 and put it in to bounds tables "buf + index" is an
+		 * address inside the buffer where we are pretending that we
+		 * are going to put a pointer We do not, though because we will
+		 * never load entries from the table, so it doesn't matter.
+		 */
+		mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
+		dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
+				ptr_addr, buf);
+	}
+	return nr_to_fill;
+}
+
+unsigned long align_down(unsigned long alignme, unsigned long align_to)
+{
+	return alignme & ~(align_to-1);
+}
+
+unsigned long align_up(unsigned long alignme, unsigned long align_to)
+{
+	return (alignme + align_to - 1) & ~(align_to-1);
+}
+
+/*
+ * Using 1MB alignment guarantees that each no allocation
+ * will overlap with another's bounds tables.
+ *
+ * We have to cook our own allocator here.  malloc() can
+ * mix other allocation with ours which means that even
+ * if we free all of our allocations, there might still
+ * be bounds tables for the *areas* since there is other
+ * valid memory there.
+ *
+ * We also can't use malloc() because a free() of an area
+ * might not free it back to the kernel.  We want it
+ * completely unmapped an malloc() does not guarantee
+ * that.
+ */
+#ifdef __i386__
+long alignment = 4096;
+long sz_alignment = 4096;
+#else
+long alignment = 1 * MB;
+long sz_alignment = 1 * MB;
+#endif
+void *mpx_mini_alloc(unsigned long sz)
+{
+	unsigned long long tries = 0;
+	static void *last;
+	void *ptr;
+	void *try_at;
+
+	sz = align_up(sz, sz_alignment);
+
+	try_at = last + alignment;
+	while (1) {
+		ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
+				MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+		if (ptr == (void *)-1)
+			return NULL;
+		if (ptr == try_at)
+			break;
+
+		munmap(ptr, sz);
+		try_at += alignment;
+#ifdef __i386__
+		/*
+		 * This isn't quite correct for 32-bit binaries
+		 * on 64-bit kernels since they can use the
+		 * entire 32-bit address space, but it's close
+		 * enough.
+		 */
+		if (try_at > (void *)0xC0000000)
+#else
+		if (try_at > (void *)0x0000800000000000)
+#endif
+			try_at = (void *)0x0;
+		if (!(++tries % 10000))
+			dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
+		continue;
+	}
+	last = ptr;
+	dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
+	return ptr;
+}
+void mpx_mini_free(void *ptr, long sz)
+{
+	dprintf2("%s() ptr: %p\n", __func__, ptr);
+	if ((unsigned long)ptr > 0x100000000000) {
+		dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
+		test_failed();
+	}
+	sz = align_up(sz, sz_alignment);
+	dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
+	munmap(ptr, sz);
+	dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
+}
+
+#define NR_MALLOCS 100
+struct one_malloc {
+	char *ptr;
+	int nr_filled_btes;
+	unsigned long size;
+};
+struct one_malloc mallocs[NR_MALLOCS];
+
+void free_one_malloc(int index)
+{
+	unsigned long free_ptr;
+	unsigned long mask;
+
+	if (!mallocs[index].ptr)
+		return;
+
+	mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
+	dprintf4("freed[%d]:  %p\n", index, mallocs[index].ptr);
+
+	free_ptr = (unsigned long)mallocs[index].ptr;
+	mask = alignment-1;
+	dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
+			(free_ptr & mask), mask);
+	assert((free_ptr & mask) == 0);
+
+	mallocs[index].ptr = NULL;
+}
+
+#ifdef __i386__
+#define MPX_BOUNDS_TABLE_COVERS 4096
+#else
+#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
+#endif
+void zap_everything(void)
+{
+	long after_zap;
+	long before_zap;
+	int i;
+
+	before_zap = inspect_me(bounds_dir_ptr);
+	dprintf1("zapping everything start: %ld\n", before_zap);
+	for (i = 0; i < NR_MALLOCS; i++)
+		free_one_malloc(i);
+
+	after_zap = inspect_me(bounds_dir_ptr);
+	dprintf1("zapping everything done: %ld\n", after_zap);
+	/*
+	 * We only guarantee to empty the thing out if our allocations are
+	 * exactly aligned on the boundaries of a boudns table.
+	 */
+	if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
+	    (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
+		if (after_zap != 0)
+			test_failed();
+
+		assert(after_zap == 0);
+	}
+}
+
+void do_one_malloc(void)
+{
+	static int malloc_counter;
+	long sz;
+	int rand_index = (mpx_random() % NR_MALLOCS);
+	void *ptr = mallocs[rand_index].ptr;
+
+	dprintf3("%s() enter\n", __func__);
+
+	if (ptr) {
+		dprintf3("freeing one malloc at index: %d\n", rand_index);
+		free_one_malloc(rand_index);
+		if (mpx_random() % (NR_MALLOCS*3) == 3) {
+			int i;
+			dprintf3("zapping some more\n");
+			for (i = rand_index; i < NR_MALLOCS; i++)
+				free_one_malloc(i);
+		}
+		if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
+			zap_everything();
+	}
+
+	/* 1->~1M */
+	sz = (1 + mpx_random() % 1000) * 1000;
+	ptr = mpx_mini_alloc(sz);
+	if (!ptr) {
+		/*
+		 * If we are failing allocations, just assume we
+		 * are out of memory and zap everything.
+		 */
+		dprintf3("zapping everything because out of memory\n");
+		zap_everything();
+		goto out;
+	}
+
+	dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
+	mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
+	mallocs[rand_index].ptr = ptr;
+	mallocs[rand_index].size = sz;
+out:
+	if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
+		inspect_me(bounds_dir_ptr);
+}
+
+void run_timed_test(void (*test_func)(void))
+{
+	int done = 0;
+	long iteration = 0;
+	static time_t last_print;
+	time_t now;
+	time_t start;
+
+	time(&start);
+	while (!done) {
+		time(&now);
+		if ((now - start) > TEST_DURATION_SECS)
+			done = 1;
+
+		test_func();
+		iteration++;
+
+		if ((now - last_print > 1) || done) {
+			printf("iteration %ld complete, OK so far\n", iteration);
+			last_print = now;
+		}
+	}
+}
+
+void check_bounds_table_frees(void)
+{
+	printf("executing unmaptest\n");
+	inspect_me(bounds_dir_ptr);
+	run_timed_test(&do_one_malloc);
+	printf("done with malloc() fun\n");
+}
+
+void insn_test_failed(int test_nr, int test_round, void *buf,
+		void *buf_shadow, void *ptr)
+{
+	print_context(xsave_test_buf);
+	eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
+	while (test_nr == 5) {
+		struct mpx_bt_entry *bte;
+		struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
+		struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
+
+		printf("  bd: %p\n", bd);
+		printf("&bde: %p\n", bde);
+		printf("*bde: %lx\n", *(unsigned long *)bde);
+		if (!bd_entry_valid(bde))
+			break;
+
+		bte = mpx_vaddr_to_bt_entry(buf, bd);
+		printf(" te: %p\n", bte);
+		printf("bte[0]: %lx\n", bte->contents[0]);
+		printf("bte[1]: %lx\n", bte->contents[1]);
+		printf("bte[2]: %lx\n", bte->contents[2]);
+		printf("bte[3]: %lx\n", bte->contents[3]);
+		break;
+	}
+	test_failed();
+}
+
+void check_mpx_insns_and_tables(void)
+{
+	int successes = 0;
+	int failures  = 0;
+	int buf_size = (1024*1024);
+	unsigned long *buf = malloc(buf_size);
+	const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
+	int i, j;
+
+	memset(buf, 0, buf_size);
+	memset(buf_shadow, 0, sizeof(buf_shadow));
+
+	for (i = 0; i < TEST_ROUNDS; i++) {
+		uint8_t *ptr = get_random_addr() + 8;
+
+		for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
+			if (0 && j != 5) {
+				successes++;
+				continue;
+			}
+			dprintf2("starting test %d round %d\n", j, i);
+			dprint_context(xsave_test_buf);
+			/*
+			 * test5 loads an address from the bounds tables.
+			 * The load will only complete if 'ptr' matches
+			 * the load and the store, so with random addrs,
+			 * the odds of this are very small.  Make it
+			 * higher by only moving 'ptr' 1/10 times.
+			 */
+			if (random() % 10 <= 0)
+				ptr = get_random_addr() + 8;
+			dprintf3("random ptr{%p}\n", ptr);
+			dprint_context(xsave_test_buf);
+			run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
+			dprint_context(xsave_test_buf);
+			if (!compare_context(xsave_test_buf)) {
+				insn_test_failed(j, i, buf, buf_shadow, ptr);
+				failures++;
+				goto exit;
+			}
+			successes++;
+			dprint_context(xsave_test_buf);
+			dprintf2("finished test %d round %d\n", j, i);
+			dprintf3("\n");
+			dprint_context(xsave_test_buf);
+		}
+	}
+
+exit:
+	dprintf2("\nabout to free:\n");
+	free(buf);
+	dprintf1("successes: %d\n", successes);
+	dprintf1(" failures: %d\n", failures);
+	dprintf1("    tests: %d\n", total_nr_tests);
+	dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+	dprintf1("      saw: %d #BRs\n", br_count);
+	if (failures) {
+		eprintf("ERROR: non-zero number of failures\n");
+		exit(20);
+	}
+	if (successes != total_nr_tests) {
+		eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n",
+				successes, total_nr_tests);
+		exit(21);
+	}
+	if (num_upper_brs + num_lower_brs != br_count) {
+		eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
+				num_upper_brs, num_lower_brs, br_count);
+		eprintf("successes: %d\n", successes);
+		eprintf(" failures: %d\n", failures);
+		eprintf("    tests: %d\n", total_nr_tests);
+		eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+		eprintf("      saw: %d #BRs\n", br_count);
+		exit(22);
+	}
+}
+
+/*
+ * This is supposed to SIGSEGV nicely once the kernel
+ * can no longer allocate vaddr space.
+ */
+void exhaust_vaddr_space(void)
+{
+	unsigned long ptr;
+	/* Try to make sure there is no room for a bounds table anywhere */
+	unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
+#ifdef __i386__
+	unsigned long max_vaddr = 0xf7788000UL;
+#else
+	unsigned long max_vaddr = 0x800000000000UL;
+#endif
+
+	dprintf1("%s() start\n", __func__);
+	/* do not start at 0, we aren't allowed to map there */
+	for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+		void *ptr_ret;
+		int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
+
+		if (!ret) {
+			dprintf1("madvise() %lx ret: %d\n", ptr, ret);
+			continue;
+		}
+		ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
+				MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+		if (ptr_ret != (void *)ptr) {
+			perror("mmap");
+			dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+			break;
+		}
+		if (!(ptr & 0xffffff))
+			dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+	}
+	for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+		dprintf2("covering 0x%lx with bounds table entries\n", ptr);
+		cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
+	}
+	dprintf1("%s() end\n", __func__);
+	printf("done with vaddr space fun\n");
+}
+
+void mpx_table_test(void)
+{
+	printf("starting mpx bounds table test\n");
+	run_timed_test(check_mpx_insns_and_tables);
+	printf("done with mpx bounds table test\n");
+}
+
+int main(int argc, char **argv)
+{
+	int unmaptest = 0;
+	int vaddrexhaust = 0;
+	int tabletest = 0;
+	int i;
+
+	check_mpx_support();
+	mpx_prepare();
+	srandom(11179);
+
+	bd_incore();
+	init();
+	bd_incore();
+
+	trace_me();
+
+	xsave_state((void *)xsave_test_buf, 0x1f);
+	if (!compare_context(xsave_test_buf))
+		printf("Init failed\n");
+
+	for (i = 1; i < argc; i++) {
+		if (!strcmp(argv[i], "unmaptest"))
+			unmaptest = 1;
+		if (!strcmp(argv[i], "vaddrexhaust"))
+			vaddrexhaust = 1;
+		if (!strcmp(argv[i], "tabletest"))
+			tabletest = 1;
+	}
+	if (!(unmaptest || vaddrexhaust || tabletest)) {
+		unmaptest = 1;
+		/* vaddrexhaust = 1; */
+		tabletest = 1;
+	}
+	if (unmaptest)
+		check_bounds_table_frees();
+	if (tabletest)
+		mpx_table_test();
+	if (vaddrexhaust)
+		exhaust_vaddr_space();
+	printf("%s completed successfully\n", argv[0]);
+	exit(0);
+}
+
+#include "mpx-dig.c"
diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h
new file mode 100644
index 000000000..6dbdd66b8
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-mm.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MPX_MM_H
+#define _MPX_MM_H
+
+#define PAGE_SIZE 4096
+#define MB (1UL<<20)
+
+extern long nr_incore(void *ptr, unsigned long size_bytes);
+
+#endif /* _MPX_MM_H */
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
new file mode 100644
index 000000000..254e5436b
--- /dev/null
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PKEYS_HELPER_H
+#define _PKEYS_HELPER_H
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#define NR_PKEYS 16
+#define PKRU_BITS_PER_PKEY 2
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
+extern int dprint_in_signal;
+extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+static inline void sigsafe_printf(const char *format, ...)
+{
+	va_list ap;
+
+	if (!dprint_in_signal) {
+		va_start(ap, format);
+		vprintf(format, ap);
+		va_end(ap);
+	} else {
+		int ret;
+		/*
+		 * No printf() functions are signal-safe.
+		 * They deadlock easily. Write the format
+		 * string to get some output, even if
+		 * incomplete.
+		 */
+		ret = write(1, format, strlen(format));
+		if (ret < 0)
+			exit(1);
+	}
+}
+#define dprintf_level(level, args...) do {	\
+	if (level <= DEBUG_LEVEL)		\
+		sigsafe_printf(args);		\
+} while (0)
+#define dprintf0(args...) dprintf_level(0, args)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+
+extern unsigned int shadow_pkru;
+static inline unsigned int __rdpkru(void)
+{
+	unsigned int eax, edx;
+	unsigned int ecx = 0;
+	unsigned int pkru;
+
+	asm volatile(".byte 0x0f,0x01,0xee\n\t"
+		     : "=a" (eax), "=d" (edx)
+		     : "c" (ecx));
+	pkru = eax;
+	return pkru;
+}
+
+static inline unsigned int _rdpkru(int line)
+{
+	unsigned int pkru = __rdpkru();
+
+	dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
+			line, pkru, shadow_pkru);
+	assert(pkru == shadow_pkru);
+
+	return pkru;
+}
+
+#define rdpkru() _rdpkru(__LINE__)
+
+static inline void __wrpkru(unsigned int pkru)
+{
+	unsigned int eax = pkru;
+	unsigned int ecx = 0;
+	unsigned int edx = 0;
+
+	dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+	asm volatile(".byte 0x0f,0x01,0xef\n\t"
+		     : : "a" (eax), "c" (ecx), "d" (edx));
+	assert(pkru == __rdpkru());
+}
+
+static inline void wrpkru(unsigned int pkru)
+{
+	dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+	/* will do the shadow check for us: */
+	rdpkru();
+	__wrpkru(pkru);
+	shadow_pkru = pkru;
+	dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
+}
+
+/*
+ * These are technically racy. since something could
+ * change PKRU between the read and the write.
+ */
+static inline void __pkey_access_allow(int pkey, int do_allow)
+{
+	unsigned int pkru = rdpkru();
+	int bit = pkey * 2;
+
+	if (do_allow)
+		pkru &= (1<<bit);
+	else
+		pkru |= (1<<bit);
+
+	dprintf4("pkru now: %08x\n", rdpkru());
+	wrpkru(pkru);
+}
+
+static inline void __pkey_write_allow(int pkey, int do_allow_write)
+{
+	long pkru = rdpkru();
+	int bit = pkey * 2 + 1;
+
+	if (do_allow_write)
+		pkru &= (1<<bit);
+	else
+		pkru |= (1<<bit);
+
+	wrpkru(pkru);
+	dprintf4("pkru now: %08x\n", rdpkru());
+}
+
+#define PROT_PKEY0     0x10            /* protection key value (bit 0) */
+#define PROT_PKEY1     0x20            /* protection key value (bit 1) */
+#define PROT_PKEY2     0x40            /* protection key value (bit 2) */
+#define PROT_PKEY3     0x80            /* protection key value (bit 3) */
+
+#define PAGE_SIZE 4096
+#define MB	(1<<20)
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+		unsigned int *ecx, unsigned int *edx)
+{
+	/* ecx is often an input as well as an output. */
+	asm volatile(
+		"cpuid;"
+		: "=a" (*eax),
+		  "=b" (*ebx),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU        (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE      (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pku(void)
+{
+	unsigned int eax;
+	unsigned int ebx;
+	unsigned int ecx;
+	unsigned int edx;
+
+	eax = 0x7;
+	ecx = 0x0;
+	__cpuid(&eax, &ebx, &ecx, &edx);
+
+	if (!(ecx & X86_FEATURE_PKU)) {
+		dprintf2("cpu does not have PKU\n");
+		return 0;
+	}
+	if (!(ecx & X86_FEATURE_OSPKE)) {
+		dprintf2("cpu does not have OSPKE\n");
+		return 0;
+	}
+	return 1;
+}
+
+#define XSTATE_PKRU_BIT	(9)
+#define XSTATE_PKRU	0x200
+
+int pkru_xstate_offset(void)
+{
+	unsigned int eax;
+	unsigned int ebx;
+	unsigned int ecx;
+	unsigned int edx;
+	int xstate_offset;
+	int xstate_size;
+	unsigned long XSTATE_CPUID = 0xd;
+	int leaf;
+
+	/* assume that XSTATE_PKRU is set in XCR0 */
+	leaf = XSTATE_PKRU_BIT;
+	{
+		eax = XSTATE_CPUID;
+		ecx = leaf;
+		__cpuid(&eax, &ebx, &ecx, &edx);
+
+		if (leaf == XSTATE_PKRU_BIT) {
+			xstate_offset = ebx;
+			xstate_size = eax;
+		}
+	}
+
+	if (xstate_size == 0) {
+		printf("could not find size/offset of PKRU in xsave state\n");
+		return 0;
+	}
+
+	return xstate_offset;
+}
+
+#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
new file mode 100644
index 000000000..27661302a
--- /dev/null
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -0,0 +1,1508 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
+ *
+ * There are examples in here of:
+ *  * how to set protection keys on memory
+ *  * how to set/clear bits in PKRU (the rights register)
+ *  * how to handle SEGV_PKRU signals and extract pkey-relevant
+ *    information from the siginfo
+ *
+ * Things to add:
+ *	make sure KSM and KSM COW breaking works
+ *	prefault pages in at malloc, or not
+ *	protect MPX bounds tables with protection keys?
+ *	make sure VMA splitting/merging is working correctly
+ *	OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
+ *	look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
+ *	do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
+ *
+ * Compile like this:
+ *	gcc      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ *	gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/futex.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <setjmp.h>
+
+#include "pkey-helpers.h"
+
+int iteration_nr = 1;
+int test_nr;
+
+unsigned int shadow_pkru;
+
+#define HPAGE_SIZE	(1UL<<21)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to)	(((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to)	((typeof(p))ALIGN_UP((unsigned long)(p),	ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to)	((typeof(p))ALIGN_DOWN((unsigned long)(p),	ptr_align_to))
+#define __stringify_1(x...)     #x
+#define __stringify(x...)       __stringify_1(x)
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
+int dprint_in_signal;
+char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+extern void abort_hooks(void);
+#define pkey_assert(condition) do {		\
+	if (!(condition)) {			\
+		dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+				__FILE__, __LINE__,	\
+				test_nr, iteration_nr);	\
+		dprintf0("errno at assert: %d", errno);	\
+		abort_hooks();			\
+		exit(__LINE__);			\
+	}					\
+} while (0)
+
+void cat_into_file(char *str, char *file)
+{
+	int fd = open(file, O_RDWR);
+	int ret;
+
+	dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
+	/*
+	 * these need to be raw because they are called under
+	 * pkey_assert()
+	 */
+	if (fd < 0) {
+		fprintf(stderr, "error opening '%s'\n", str);
+		perror("error: ");
+		exit(__LINE__);
+	}
+
+	ret = write(fd, str, strlen(str));
+	if (ret != strlen(str)) {
+		perror("write to file failed");
+		fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
+		exit(__LINE__);
+	}
+	close(fd);
+}
+
+#if CONTROL_TRACING > 0
+static int warned_tracing;
+int tracing_root_ok(void)
+{
+	if (geteuid() != 0) {
+		if (!warned_tracing)
+			fprintf(stderr, "WARNING: not run as root, "
+					"can not do tracing control\n");
+		warned_tracing = 1;
+		return 0;
+	}
+	return 1;
+}
+#endif
+
+void tracing_on(void)
+{
+#if CONTROL_TRACING > 0
+#define TRACEDIR "/sys/kernel/debug/tracing"
+	char pidstr[32];
+
+	if (!tracing_root_ok())
+		return;
+
+	sprintf(pidstr, "%d", getpid());
+	cat_into_file("0", TRACEDIR "/tracing_on");
+	cat_into_file("\n", TRACEDIR "/trace");
+	if (1) {
+		cat_into_file("function_graph", TRACEDIR "/current_tracer");
+		cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
+	} else {
+		cat_into_file("nop", TRACEDIR "/current_tracer");
+	}
+	cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
+	cat_into_file("1", TRACEDIR "/tracing_on");
+	dprintf1("enabled tracing\n");
+#endif
+}
+
+void tracing_off(void)
+{
+#if CONTROL_TRACING > 0
+	if (!tracing_root_ok())
+		return;
+	cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
+#endif
+}
+
+void abort_hooks(void)
+{
+	fprintf(stderr, "running %s()...\n", __func__);
+	tracing_off();
+#ifdef SLEEP_ON_ABORT
+	sleep(SLEEP_ON_ABORT);
+#endif
+}
+
+static inline void __page_o_noops(void)
+{
+	/* 8-bytes of instruction * 512 bytes = 1 page */
+	asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+/*
+ * This attempts to have roughly a page of instructions followed by a few
+ * instructions that do a write, and another page of instructions.  That
+ * way, we are pretty sure that the write is in the second page of
+ * instructions and has at least a page of padding behind it.
+ *
+ * *That* lets us be sure to madvise() away the write instruction, which
+ * will then fault, which makes sure that the fault code handles
+ * execute-only memory properly.
+ */
+__attribute__((__aligned__(PAGE_SIZE)))
+void lots_o_noops_around_write(int *write_to_me)
+{
+	dprintf3("running %s()\n", __func__);
+	__page_o_noops();
+	/* Assume this happens in the second page of instructions: */
+	*write_to_me = __LINE__;
+	/* pad out by another page: */
+	__page_o_noops();
+	dprintf3("%s() done\n", __func__);
+}
+
+/* Define some kernel-like types */
+#define  u8 uint8_t
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#ifdef __i386__
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key	380
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc		381
+# define SYS_pkey_free		382
+#endif
+
+#define REG_IP_IDX		REG_EIP
+#define si_pkey_offset		0x14
+
+#else
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key	329
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc		330
+# define SYS_pkey_free		331
+#endif
+
+#define REG_IP_IDX		REG_RIP
+#define si_pkey_offset		0x20
+
+#endif
+
+void dump_mem(void *dumpme, int len_bytes)
+{
+	char *c = (void *)dumpme;
+	int i;
+
+	for (i = 0; i < len_bytes; i += sizeof(u64)) {
+		u64 *ptr = (u64 *)(c + i);
+		dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
+	}
+}
+
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR		3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR		4
+#endif
+
+static char *si_code_str(int si_code)
+{
+	if (si_code == SEGV_MAPERR)
+		return "SEGV_MAPERR";
+	if (si_code == SEGV_ACCERR)
+		return "SEGV_ACCERR";
+	if (si_code == SEGV_BNDERR)
+		return "SEGV_BNDERR";
+	if (si_code == SEGV_PKUERR)
+		return "SEGV_PKUERR";
+	return "UNKNOWN";
+}
+
+int pkru_faults;
+int last_si_pkey = -1;
+void signal_handler(int signum, siginfo_t *si, void *vucontext)
+{
+	ucontext_t *uctxt = vucontext;
+	int trapno;
+	unsigned long ip;
+	char *fpregs;
+	u32 *pkru_ptr;
+	u64 siginfo_pkey;
+	u32 *si_pkey_ptr;
+	int pkru_offset;
+	fpregset_t fpregset;
+
+	dprint_in_signal = 1;
+	dprintf1(">>>>===============SIGSEGV============================\n");
+	dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
+			__rdpkru(), shadow_pkru);
+
+	trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+	ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+	fpregset = uctxt->uc_mcontext.fpregs;
+	fpregs = (void *)fpregset;
+
+	dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
+			trapno, ip, si_code_str(si->si_code), si->si_code);
+#ifdef __i386__
+	/*
+	 * 32-bit has some extra padding so that userspace can tell whether
+	 * the XSTATE header is present in addition to the "legacy" FPU
+	 * state.  We just assume that it is here.
+	 */
+	fpregs += 0x70;
+#endif
+	pkru_offset = pkru_xstate_offset();
+	pkru_ptr = (void *)(&fpregs[pkru_offset]);
+
+	dprintf1("siginfo: %p\n", si);
+	dprintf1(" fpregs: %p\n", fpregs);
+	/*
+	 * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+	 * here.
+	 */
+	dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+	if (DEBUG_LEVEL > 4)
+		dump_mem(pkru_ptr - 128, 256);
+	pkey_assert(*pkru_ptr);
+
+	if ((si->si_code == SEGV_MAPERR) ||
+	    (si->si_code == SEGV_ACCERR) ||
+	    (si->si_code == SEGV_BNDERR)) {
+		printf("non-PK si_code, exiting...\n");
+		exit(4);
+	}
+
+	si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+	dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
+	dump_mem((u8 *)si_pkey_ptr - 8, 24);
+	siginfo_pkey = *si_pkey_ptr;
+	pkey_assert(siginfo_pkey < NR_PKEYS);
+	last_si_pkey = siginfo_pkey;
+
+	dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
+	/* need __rdpkru() version so we do not do shadow_pkru checking */
+	dprintf1("signal pkru from  pkru: %08x\n", __rdpkru());
+	dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
+	*(u64 *)pkru_ptr = 0x00000000;
+	dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
+	pkru_faults++;
+	dprintf1("<<<<==================================================\n");
+	dprint_in_signal = 0;
+}
+
+int wait_all_children(void)
+{
+	int status;
+	return waitpid(-1, &status, 0);
+}
+
+void sig_chld(int x)
+{
+	dprint_in_signal = 1;
+	dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
+	dprint_in_signal = 0;
+}
+
+void setup_sigsegv_handler(void)
+{
+	int r, rs;
+	struct sigaction newact;
+	struct sigaction oldact;
+
+	/* #PF is mapped to sigsegv */
+	int signum  = SIGSEGV;
+
+	newact.sa_handler = 0;
+	newact.sa_sigaction = signal_handler;
+
+	/*sigset_t - signals to block while in the handler */
+	/* get the old signal mask. */
+	rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+	pkey_assert(rs == 0);
+
+	/* call sa_sigaction, not sa_handler*/
+	newact.sa_flags = SA_SIGINFO;
+
+	newact.sa_restorer = 0;  /* void(*)(), obsolete */
+	r = sigaction(signum, &newact, &oldact);
+	r = sigaction(SIGALRM, &newact, &oldact);
+	pkey_assert(r == 0);
+}
+
+void setup_handlers(void)
+{
+	signal(SIGCHLD, &sig_chld);
+	setup_sigsegv_handler();
+}
+
+pid_t fork_lazy_child(void)
+{
+	pid_t forkret;
+
+	forkret = fork();
+	pkey_assert(forkret >= 0);
+	dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
+
+	if (!forkret) {
+		/* in the child */
+		while (1) {
+			dprintf1("child sleeping...\n");
+			sleep(30);
+		}
+	}
+	return forkret;
+}
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS	0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE	0x2
+#endif
+
+static u32 hw_pkey_get(int pkey, unsigned long flags)
+{
+	u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+	u32 pkru = __rdpkru();
+	u32 shifted_pkru;
+	u32 masked_pkru;
+
+	dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
+			__func__, pkey, flags, 0, 0);
+	dprintf2("%s() raw pkru: %x\n", __func__, pkru);
+
+	shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
+	dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
+	masked_pkru = shifted_pkru & mask;
+	dprintf2("%s() masked  pkru: %x\n", __func__, masked_pkru);
+	/*
+	 * shift down the relevant bits to the lowest two, then
+	 * mask off all the other high bits.
+	 */
+	return masked_pkru;
+}
+
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
+{
+	u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+	u32 old_pkru = __rdpkru();
+	u32 new_pkru;
+
+	/* make sure that 'rights' only contains the bits we expect: */
+	assert(!(rights & ~mask));
+
+	/* copy old pkru */
+	new_pkru = old_pkru;
+	/* mask out bits from pkey in old value: */
+	new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
+	/* OR in new bits for pkey: */
+	new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
+
+	__wrpkru(new_pkru);
+
+	dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
+			__func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
+	return 0;
+}
+
+void pkey_disable_set(int pkey, int flags)
+{
+	unsigned long syscall_flags = 0;
+	int ret;
+	int pkey_rights;
+	u32 orig_pkru = rdpkru();
+
+	dprintf1("START->%s(%d, 0x%x)\n", __func__,
+		pkey, flags);
+	pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+	pkey_rights = hw_pkey_get(pkey, syscall_flags);
+
+	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+			pkey, pkey, pkey_rights);
+	pkey_assert(pkey_rights >= 0);
+
+	pkey_rights |= flags;
+
+	ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
+	assert(!ret);
+	/*pkru and flags have the same format */
+	shadow_pkru |= flags << (pkey * 2);
+	dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
+
+	pkey_assert(ret >= 0);
+
+	pkey_rights = hw_pkey_get(pkey, syscall_flags);
+	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+			pkey, pkey, pkey_rights);
+
+	dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+	if (flags)
+		pkey_assert(rdpkru() > orig_pkru);
+	dprintf1("END<---%s(%d, 0x%x)\n", __func__,
+		pkey, flags);
+}
+
+void pkey_disable_clear(int pkey, int flags)
+{
+	unsigned long syscall_flags = 0;
+	int ret;
+	int pkey_rights = hw_pkey_get(pkey, syscall_flags);
+	u32 orig_pkru = rdpkru();
+
+	pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+			pkey, pkey, pkey_rights);
+	pkey_assert(pkey_rights >= 0);
+
+	pkey_rights |= flags;
+
+	ret = hw_pkey_set(pkey, pkey_rights, 0);
+	/* pkru and flags have the same format */
+	shadow_pkru &= ~(flags << (pkey * 2));
+	pkey_assert(ret >= 0);
+
+	pkey_rights = hw_pkey_get(pkey, syscall_flags);
+	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+			pkey, pkey, pkey_rights);
+
+	dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+	if (flags)
+		assert(rdpkru() > orig_pkru);
+}
+
+void pkey_write_allow(int pkey)
+{
+	pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_write_deny(int pkey)
+{
+	pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_access_allow(int pkey)
+{
+	pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
+}
+void pkey_access_deny(int pkey)
+{
+	pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
+}
+
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+		unsigned long pkey)
+{
+	int sret;
+
+	dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
+			ptr, size, orig_prot, pkey);
+
+	errno = 0;
+	sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
+	if (errno) {
+		dprintf2("SYS_mprotect_key sret: %d\n", sret);
+		dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
+		dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
+		if (DEBUG_LEVEL >= 2)
+			perror("SYS_mprotect_pkey");
+	}
+	return sret;
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+	int ret = syscall(SYS_pkey_alloc, flags, init_val);
+	dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
+			__func__, flags, init_val, ret, errno);
+	return ret;
+}
+
+int alloc_pkey(void)
+{
+	int ret;
+	unsigned long init_val = 0x0;
+
+	dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
+			__LINE__, __rdpkru(), shadow_pkru);
+	ret = sys_pkey_alloc(0, init_val);
+	/*
+	 * pkey_alloc() sets PKRU, so we need to reflect it in
+	 * shadow_pkru:
+	 */
+	dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+			__LINE__, ret, __rdpkru(), shadow_pkru);
+	if (ret) {
+		/* clear both the bits: */
+		shadow_pkru &= ~(0x3      << (ret * 2));
+		dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+				__LINE__, ret, __rdpkru(), shadow_pkru);
+		/*
+		 * move the new state in from init_val
+		 * (remember, we cheated and init_val == pkru format)
+		 */
+		shadow_pkru |=  (init_val << (ret * 2));
+	}
+	dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+			__LINE__, ret, __rdpkru(), shadow_pkru);
+	dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+	/* for shadow checking: */
+	rdpkru();
+	dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+			__LINE__, ret, __rdpkru(), shadow_pkru);
+	return ret;
+}
+
+int sys_pkey_free(unsigned long pkey)
+{
+	int ret = syscall(SYS_pkey_free, pkey);
+	dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
+	return ret;
+}
+
+/*
+ * I had a bug where pkey bits could be set by mprotect() but
+ * not cleared.  This ensures we get lots of random bit sets
+ * and clears on the vma and pte pkey bits.
+ */
+int alloc_random_pkey(void)
+{
+	int max_nr_pkey_allocs;
+	int ret;
+	int i;
+	int alloced_pkeys[NR_PKEYS];
+	int nr_alloced = 0;
+	int random_index;
+	memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+
+	/* allocate every possible key and make a note of which ones we got */
+	max_nr_pkey_allocs = NR_PKEYS;
+	for (i = 0; i < max_nr_pkey_allocs; i++) {
+		int new_pkey = alloc_pkey();
+		if (new_pkey < 0)
+			break;
+		alloced_pkeys[nr_alloced++] = new_pkey;
+	}
+
+	pkey_assert(nr_alloced > 0);
+	/* select a random one out of the allocated ones */
+	random_index = rand() % nr_alloced;
+	ret = alloced_pkeys[random_index];
+	/* now zero it out so we don't free it next */
+	alloced_pkeys[random_index] = 0;
+
+	/* go through the allocated ones that we did not want and free them */
+	for (i = 0; i < nr_alloced; i++) {
+		int free_ret;
+		if (!alloced_pkeys[i])
+			continue;
+		free_ret = sys_pkey_free(alloced_pkeys[i]);
+		pkey_assert(!free_ret);
+	}
+	dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+			__LINE__, ret, __rdpkru(), shadow_pkru);
+	return ret;
+}
+
+int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+		unsigned long pkey)
+{
+	int nr_iterations = random() % 100;
+	int ret;
+
+	while (0) {
+		int rpkey = alloc_random_pkey();
+		ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+		dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+				ptr, size, orig_prot, pkey, ret);
+		if (nr_iterations-- < 0)
+			break;
+
+		dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+			__LINE__, ret, __rdpkru(), shadow_pkru);
+		sys_pkey_free(rpkey);
+		dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+			__LINE__, ret, __rdpkru(), shadow_pkru);
+	}
+	pkey_assert(pkey < NR_PKEYS);
+
+	ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+	dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+			ptr, size, orig_prot, pkey, ret);
+	pkey_assert(!ret);
+	dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+			__LINE__, ret, __rdpkru(), shadow_pkru);
+	return ret;
+}
+
+struct pkey_malloc_record {
+	void *ptr;
+	long size;
+	int prot;
+};
+struct pkey_malloc_record *pkey_malloc_records;
+struct pkey_malloc_record *pkey_last_malloc_record;
+long nr_pkey_malloc_records;
+void record_pkey_malloc(void *ptr, long size, int prot)
+{
+	long i;
+	struct pkey_malloc_record *rec = NULL;
+
+	for (i = 0; i < nr_pkey_malloc_records; i++) {
+		rec = &pkey_malloc_records[i];
+		/* find a free record */
+		if (rec)
+			break;
+	}
+	if (!rec) {
+		/* every record is full */
+		size_t old_nr_records = nr_pkey_malloc_records;
+		size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
+		size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
+		dprintf2("new_nr_records: %zd\n", new_nr_records);
+		dprintf2("new_size: %zd\n", new_size);
+		pkey_malloc_records = realloc(pkey_malloc_records, new_size);
+		pkey_assert(pkey_malloc_records != NULL);
+		rec = &pkey_malloc_records[nr_pkey_malloc_records];
+		/*
+		 * realloc() does not initialize memory, so zero it from
+		 * the first new record all the way to the end.
+		 */
+		for (i = 0; i < new_nr_records - old_nr_records; i++)
+			memset(rec + i, 0, sizeof(*rec));
+	}
+	dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
+		(int)(rec - pkey_malloc_records), rec, ptr, size);
+	rec->ptr = ptr;
+	rec->size = size;
+	rec->prot = prot;
+	pkey_last_malloc_record = rec;
+	nr_pkey_malloc_records++;
+}
+
+void free_pkey_malloc(void *ptr)
+{
+	long i;
+	int ret;
+	dprintf3("%s(%p)\n", __func__, ptr);
+	for (i = 0; i < nr_pkey_malloc_records; i++) {
+		struct pkey_malloc_record *rec = &pkey_malloc_records[i];
+		dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
+				ptr, i, rec, rec->ptr, rec->size);
+		if ((ptr <  rec->ptr) ||
+		    (ptr >= rec->ptr + rec->size))
+			continue;
+
+		dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
+				ptr, i, rec, rec->ptr, rec->size);
+		nr_pkey_malloc_records--;
+		ret = munmap(rec->ptr, rec->size);
+		dprintf3("munmap ret: %d\n", ret);
+		pkey_assert(!ret);
+		dprintf3("clearing rec->ptr, rec: %p\n", rec);
+		rec->ptr = NULL;
+		dprintf3("done clearing rec->ptr, rec: %p\n", rec);
+		return;
+	}
+	pkey_assert(false);
+}
+
+
+void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
+{
+	void *ptr;
+	int ret;
+
+	rdpkru();
+	dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+			size, prot, pkey);
+	pkey_assert(pkey < NR_PKEYS);
+	ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+	pkey_assert(ptr != (void *)-1);
+	ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
+	pkey_assert(!ret);
+	record_pkey_malloc(ptr, size, prot);
+	rdpkru();
+
+	dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
+	return ptr;
+}
+
+void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
+{
+	int ret;
+	void *ptr;
+
+	dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+			size, prot, pkey);
+	/*
+	 * Guarantee we can fit at least one huge page in the resulting
+	 * allocation by allocating space for 2:
+	 */
+	size = ALIGN_UP(size, HPAGE_SIZE * 2);
+	ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+	pkey_assert(ptr != (void *)-1);
+	record_pkey_malloc(ptr, size, prot);
+	mprotect_pkey(ptr, size, prot, pkey);
+
+	dprintf1("unaligned ptr: %p\n", ptr);
+	ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
+	dprintf1("  aligned ptr: %p\n", ptr);
+	ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
+	dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
+	ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
+	dprintf1("MADV_WILLNEED ret: %d\n", ret);
+	memset(ptr, 0, HPAGE_SIZE);
+
+	dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
+	return ptr;
+}
+
+int hugetlb_setup_ok;
+#define GET_NR_HUGE_PAGES 10
+void setup_hugetlbfs(void)
+{
+	int err;
+	int fd;
+	char buf[] = "123";
+
+	if (geteuid() != 0) {
+		fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
+		return;
+	}
+
+	cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
+
+	/*
+	 * Now go make sure that we got the pages and that they
+	 * are 2M pages.  Someone might have made 1G the default.
+	 */
+	fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+	if (fd < 0) {
+		perror("opening sysfs 2M hugetlb config");
+		return;
+	}
+
+	/* -1 to guarantee leaving the trailing \0 */
+	err = read(fd, buf, sizeof(buf)-1);
+	close(fd);
+	if (err <= 0) {
+		perror("reading sysfs 2M hugetlb config");
+		return;
+	}
+
+	if (atoi(buf) != GET_NR_HUGE_PAGES) {
+		fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
+			buf, GET_NR_HUGE_PAGES);
+		return;
+	}
+
+	hugetlb_setup_ok = 1;
+}
+
+void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
+{
+	void *ptr;
+	int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
+
+	if (!hugetlb_setup_ok)
+		return PTR_ERR_ENOTSUP;
+
+	dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
+	size = ALIGN_UP(size, HPAGE_SIZE * 2);
+	pkey_assert(pkey < NR_PKEYS);
+	ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
+	pkey_assert(ptr != (void *)-1);
+	mprotect_pkey(ptr, size, prot, pkey);
+
+	record_pkey_malloc(ptr, size, prot);
+
+	dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
+	return ptr;
+}
+
+void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
+{
+	void *ptr;
+	int fd;
+
+	dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+			size, prot, pkey);
+	pkey_assert(pkey < NR_PKEYS);
+	fd = open("/dax/foo", O_RDWR);
+	pkey_assert(fd >= 0);
+
+	ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
+	pkey_assert(ptr != (void *)-1);
+
+	mprotect_pkey(ptr, size, prot, pkey);
+
+	record_pkey_malloc(ptr, size, prot);
+
+	dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
+	close(fd);
+	return ptr;
+}
+
+void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
+
+	malloc_pkey_with_mprotect,
+	malloc_pkey_anon_huge,
+	malloc_pkey_hugetlb
+/* can not do direct with the pkey_mprotect() API:
+	malloc_pkey_mmap_direct,
+	malloc_pkey_mmap_dax,
+*/
+};
+
+void *malloc_pkey(long size, int prot, u16 pkey)
+{
+	void *ret;
+	static int malloc_type;
+	int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
+
+	pkey_assert(pkey < NR_PKEYS);
+
+	while (1) {
+		pkey_assert(malloc_type < nr_malloc_types);
+
+		ret = pkey_malloc[malloc_type](size, prot, pkey);
+		pkey_assert(ret != (void *)-1);
+
+		malloc_type++;
+		if (malloc_type >= nr_malloc_types)
+			malloc_type = (random()%nr_malloc_types);
+
+		/* try again if the malloc_type we tried is unsupported */
+		if (ret == PTR_ERR_ENOTSUP)
+			continue;
+
+		break;
+	}
+
+	dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
+			size, prot, pkey, ret);
+	return ret;
+}
+
+int last_pkru_faults;
+#define UNKNOWN_PKEY -2
+void expected_pk_fault(int pkey)
+{
+	dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
+			__func__, last_pkru_faults, pkru_faults);
+	dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
+	pkey_assert(last_pkru_faults + 1 == pkru_faults);
+
+       /*
+	* For exec-only memory, we do not know the pkey in
+	* advance, so skip this check.
+	*/
+	if (pkey != UNKNOWN_PKEY)
+		pkey_assert(last_si_pkey == pkey);
+
+	/*
+	 * The signal handler shold have cleared out PKRU to let the
+	 * test program continue.  We now have to restore it.
+	 */
+	if (__rdpkru() != 0)
+		pkey_assert(0);
+
+	__wrpkru(shadow_pkru);
+	dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
+			__func__, shadow_pkru);
+	last_pkru_faults = pkru_faults;
+	last_si_pkey = -1;
+}
+
+#define do_not_expect_pk_fault(msg)	do {			\
+	if (last_pkru_faults != pkru_faults)			\
+		dprintf0("unexpected PK fault: %s\n", msg);	\
+	pkey_assert(last_pkru_faults == pkru_faults);		\
+} while (0)
+
+int test_fds[10] = { -1 };
+int nr_test_fds;
+void __save_test_fd(int fd)
+{
+	pkey_assert(fd >= 0);
+	pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
+	test_fds[nr_test_fds] = fd;
+	nr_test_fds++;
+}
+
+int get_test_read_fd(void)
+{
+	int test_fd = open("/etc/passwd", O_RDONLY);
+	__save_test_fd(test_fd);
+	return test_fd;
+}
+
+void close_test_fds(void)
+{
+	int i;
+
+	for (i = 0; i < nr_test_fds; i++) {
+		if (test_fds[i] < 0)
+			continue;
+		close(test_fds[i]);
+		test_fds[i] = -1;
+	}
+	nr_test_fds = 0;
+}
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+__attribute__((noinline)) int read_ptr(int *ptr)
+{
+	/*
+	 * Keep GCC from optimizing this away somehow
+	 */
+	barrier();
+	return *ptr;
+}
+
+void test_read_of_write_disabled_region(int *ptr, u16 pkey)
+{
+	int ptr_contents;
+
+	dprintf1("disabling write access to PKEY[1], doing read\n");
+	pkey_write_deny(pkey);
+	ptr_contents = read_ptr(ptr);
+	dprintf1("*ptr: %d\n", ptr_contents);
+	dprintf1("\n");
+}
+void test_read_of_access_disabled_region(int *ptr, u16 pkey)
+{
+	int ptr_contents;
+
+	dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
+	rdpkru();
+	pkey_access_deny(pkey);
+	ptr_contents = read_ptr(ptr);
+	dprintf1("*ptr: %d\n", ptr_contents);
+	expected_pk_fault(pkey);
+}
+void test_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+	dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
+	pkey_write_deny(pkey);
+	*ptr = __LINE__;
+	expected_pk_fault(pkey);
+}
+void test_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+	dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
+	pkey_access_deny(pkey);
+	*ptr = __LINE__;
+	expected_pk_fault(pkey);
+}
+void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+	int ret;
+	int test_fd = get_test_read_fd();
+
+	dprintf1("disabling access to PKEY[%02d], "
+		 "having kernel read() to buffer\n", pkey);
+	pkey_access_deny(pkey);
+	ret = read(test_fd, ptr, 1);
+	dprintf1("read ret: %d\n", ret);
+	pkey_assert(ret);
+}
+void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+	int ret;
+	int test_fd = get_test_read_fd();
+
+	pkey_write_deny(pkey);
+	ret = read(test_fd, ptr, 100);
+	dprintf1("read ret: %d\n", ret);
+	if (ret < 0 && (DEBUG_LEVEL > 0))
+		perror("verbose read result (OK for this to be bad)");
+	pkey_assert(ret);
+}
+
+void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
+{
+	int pipe_ret, vmsplice_ret;
+	struct iovec iov;
+	int pipe_fds[2];
+
+	pipe_ret = pipe(pipe_fds);
+
+	pkey_assert(pipe_ret == 0);
+	dprintf1("disabling access to PKEY[%02d], "
+		 "having kernel vmsplice from buffer\n", pkey);
+	pkey_access_deny(pkey);
+	iov.iov_base = ptr;
+	iov.iov_len = PAGE_SIZE;
+	vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
+	dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
+	pkey_assert(vmsplice_ret == -1);
+
+	close(pipe_fds[0]);
+	close(pipe_fds[1]);
+}
+
+void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
+{
+	int ignored = 0xdada;
+	int futex_ret;
+	int some_int = __LINE__;
+
+	dprintf1("disabling write to PKEY[%02d], "
+		 "doing futex gunk in buffer\n", pkey);
+	*ptr = some_int;
+	pkey_write_deny(pkey);
+	futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
+			&ignored, ignored);
+	if (DEBUG_LEVEL > 0)
+		perror("futex");
+	dprintf1("futex() ret: %d\n", futex_ret);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
+{
+	int err;
+	int i;
+
+	/* Note: 0 is the default pkey, so don't mess with it */
+	for (i = 1; i < NR_PKEYS; i++) {
+		if (pkey == i)
+			continue;
+
+		dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
+		err = sys_pkey_free(i);
+		pkey_assert(err);
+
+		err = sys_pkey_free(i);
+		pkey_assert(err);
+
+		err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
+		pkey_assert(err);
+	}
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
+{
+	int err;
+	int bad_pkey = NR_PKEYS+99;
+
+	/* pass a known-invalid pkey in: */
+	err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
+	pkey_assert(err);
+}
+
+void become_child(void)
+{
+	pid_t forkret;
+
+	forkret = fork();
+	pkey_assert(forkret >= 0);
+	dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
+
+	if (!forkret) {
+		/* in the child */
+		return;
+	}
+	exit(0);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
+{
+	int err;
+	int allocated_pkeys[NR_PKEYS] = {0};
+	int nr_allocated_pkeys = 0;
+	int i;
+
+	for (i = 0; i < NR_PKEYS*3; i++) {
+		int new_pkey;
+		dprintf1("%s() alloc loop: %d\n", __func__, i);
+		new_pkey = alloc_pkey();
+		dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+				__LINE__, err, __rdpkru(), shadow_pkru);
+		rdpkru(); /* for shadow checking */
+		dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
+		if ((new_pkey == -1) && (errno == ENOSPC)) {
+			dprintf2("%s() failed to allocate pkey after %d tries\n",
+				__func__, nr_allocated_pkeys);
+		} else {
+			/*
+			 * Ensure the number of successes never
+			 * exceeds the number of keys supported
+			 * in the hardware.
+			 */
+			pkey_assert(nr_allocated_pkeys < NR_PKEYS);
+			allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+		}
+
+		/*
+		 * Make sure that allocation state is properly
+		 * preserved across fork().
+		 */
+		if (i == NR_PKEYS*2)
+			become_child();
+	}
+
+	dprintf3("%s()::%d\n", __func__, __LINE__);
+
+	/*
+	 * There are 16 pkeys supported in hardware.  Three are
+	 * allocated by the time we get here:
+	 *   1. The default key (0)
+	 *   2. One possibly consumed by an execute-only mapping.
+	 *   3. One allocated by the test code and passed in via
+	 *      'pkey' to this function.
+	 * Ensure that we can allocate at least another 13 (16-3).
+	 */
+	pkey_assert(i >= NR_PKEYS-3);
+
+	for (i = 0; i < nr_allocated_pkeys; i++) {
+		err = sys_pkey_free(allocated_pkeys[i]);
+		pkey_assert(!err);
+		rdpkru(); /* for shadow checking */
+	}
+}
+
+/*
+ * pkey 0 is special.  It is allocated by default, so you do not
+ * have to call pkey_alloc() to use it first.  Make sure that it
+ * is usable.
+ */
+void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+{
+	long size;
+	int prot;
+
+	assert(pkey_last_malloc_record);
+	size = pkey_last_malloc_record->size;
+	/*
+	 * This is a bit of a hack.  But mprotect() requires
+	 * huge-page-aligned sizes when operating on hugetlbfs.
+	 * So, make sure that we use something that's a multiple
+	 * of a huge page when we can.
+	 */
+	if (size >= HPAGE_SIZE)
+		size = HPAGE_SIZE;
+	prot = pkey_last_malloc_record->prot;
+
+	/* Use pkey 0 */
+	mprotect_pkey(ptr, size, prot, 0);
+
+	/* Make sure that we can set it back to the original pkey. */
+	mprotect_pkey(ptr, size, prot, pkey);
+}
+
+void test_ptrace_of_child(int *ptr, u16 pkey)
+{
+	__attribute__((__unused__)) int peek_result;
+	pid_t child_pid;
+	void *ignored = 0;
+	long ret;
+	int status;
+	/*
+	 * This is the "control" for our little expermient.  Make sure
+	 * we can always access it when ptracing.
+	 */
+	int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
+	int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
+
+	/*
+	 * Fork a child which is an exact copy of this process, of course.
+	 * That means we can do all of our tests via ptrace() and then plain
+	 * memory access and ensure they work differently.
+	 */
+	child_pid = fork_lazy_child();
+	dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
+
+	ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
+	if (ret)
+		perror("attach");
+	dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
+	pkey_assert(ret != -1);
+	ret = waitpid(child_pid, &status, WUNTRACED);
+	if ((ret != child_pid) || !(WIFSTOPPED(status))) {
+		fprintf(stderr, "weird waitpid result %ld stat %x\n",
+				ret, status);
+		pkey_assert(0);
+	}
+	dprintf2("waitpid ret: %ld\n", ret);
+	dprintf2("waitpid status: %d\n", status);
+
+	pkey_access_deny(pkey);
+	pkey_write_deny(pkey);
+
+	/* Write access, untested for now:
+	ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
+	pkey_assert(ret != -1);
+	dprintf1("poke at %p: %ld\n", peek_at, ret);
+	*/
+
+	/*
+	 * Try to access the pkey-protected "ptr" via ptrace:
+	 */
+	ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
+	/* expect it to work, without an error: */
+	pkey_assert(ret != -1);
+	/* Now access from the current task, and expect an exception: */
+	peek_result = read_ptr(ptr);
+	expected_pk_fault(pkey);
+
+	/*
+	 * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
+	 */
+	ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
+	/* expect it to work, without an error: */
+	pkey_assert(ret != -1);
+	/* Now access from the current task, and expect NO exception: */
+	peek_result = read_ptr(plain_ptr);
+	do_not_expect_pk_fault("read plain pointer after ptrace");
+
+	ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
+	pkey_assert(ret != -1);
+
+	ret = kill(child_pid, SIGKILL);
+	pkey_assert(ret != -1);
+
+	wait(&status);
+
+	free(plain_ptr_unaligned);
+}
+
+void *get_pointer_to_instructions(void)
+{
+	void *p1;
+
+	p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
+	dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
+	/* lots_o_noops_around_write should be page-aligned already */
+	assert(p1 == &lots_o_noops_around_write);
+
+	/* Point 'p1' at the *second* page of the function: */
+	p1 += PAGE_SIZE;
+
+	/*
+	 * Try to ensure we fault this in on next touch to ensure
+	 * we get an instruction fault as opposed to a data one
+	 */
+	madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+
+	return p1;
+}
+
+void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+{
+	void *p1;
+	int scratch;
+	int ptr_contents;
+	int ret;
+
+	p1 = get_pointer_to_instructions();
+	lots_o_noops_around_write(&scratch);
+	ptr_contents = read_ptr(p1);
+	dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+	ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
+	pkey_assert(!ret);
+	pkey_access_deny(pkey);
+
+	dprintf2("pkru: %x\n", rdpkru());
+
+	/*
+	 * Make sure this is an *instruction* fault
+	 */
+	madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+	lots_o_noops_around_write(&scratch);
+	do_not_expect_pk_fault("executing on PROT_EXEC memory");
+	ptr_contents = read_ptr(p1);
+	dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+	expected_pk_fault(pkey);
+}
+
+void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+{
+	void *p1;
+	int scratch;
+	int ptr_contents;
+	int ret;
+
+	dprintf1("%s() start\n", __func__);
+
+	p1 = get_pointer_to_instructions();
+	lots_o_noops_around_write(&scratch);
+	ptr_contents = read_ptr(p1);
+	dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+	/* Use a *normal* mprotect(), not mprotect_pkey(): */
+	ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
+	pkey_assert(!ret);
+
+	dprintf2("pkru: %x\n", rdpkru());
+
+	/* Make sure this is an *instruction* fault */
+	madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+	lots_o_noops_around_write(&scratch);
+	do_not_expect_pk_fault("executing on PROT_EXEC memory");
+	ptr_contents = read_ptr(p1);
+	dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+	expected_pk_fault(UNKNOWN_PKEY);
+
+	/*
+	 * Put the memory back to non-PROT_EXEC.  Should clear the
+	 * exec-only pkey off the VMA and allow it to be readable
+	 * again.  Go to PROT_NONE first to check for a kernel bug
+	 * that did not clear the pkey when doing PROT_NONE.
+	 */
+	ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
+	pkey_assert(!ret);
+
+	ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
+	pkey_assert(!ret);
+	ptr_contents = read_ptr(p1);
+	do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+}
+
+void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
+{
+	int size = PAGE_SIZE;
+	int sret;
+
+	if (cpu_has_pku()) {
+		dprintf1("SKIP: %s: no CPU support\n", __func__);
+		return;
+	}
+
+	sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
+	pkey_assert(sret < 0);
+}
+
+void (*pkey_tests[])(int *ptr, u16 pkey) = {
+	test_read_of_write_disabled_region,
+	test_read_of_access_disabled_region,
+	test_write_of_write_disabled_region,
+	test_write_of_access_disabled_region,
+	test_kernel_write_of_access_disabled_region,
+	test_kernel_write_of_write_disabled_region,
+	test_kernel_gup_of_access_disabled_region,
+	test_kernel_gup_write_to_write_disabled_region,
+	test_executing_on_unreadable_memory,
+	test_implicit_mprotect_exec_only_memory,
+	test_mprotect_with_pkey_0,
+	test_ptrace_of_child,
+	test_pkey_syscalls_on_non_allocated_pkey,
+	test_pkey_syscalls_bad_args,
+	test_pkey_alloc_exhaust,
+};
+
+void run_tests_once(void)
+{
+	int *ptr;
+	int prot = PROT_READ|PROT_WRITE;
+
+	for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
+		int pkey;
+		int orig_pkru_faults = pkru_faults;
+
+		dprintf1("======================\n");
+		dprintf1("test %d preparing...\n", test_nr);
+
+		tracing_on();
+		pkey = alloc_random_pkey();
+		dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
+		ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
+		dprintf1("test %d starting...\n", test_nr);
+		pkey_tests[test_nr](ptr, pkey);
+		dprintf1("freeing test memory: %p\n", ptr);
+		free_pkey_malloc(ptr);
+		sys_pkey_free(pkey);
+
+		dprintf1("pkru_faults: %d\n", pkru_faults);
+		dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+
+		tracing_off();
+		close_test_fds();
+
+		printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
+		dprintf1("======================\n\n");
+	}
+	iteration_nr++;
+}
+
+void pkey_setup_shadow(void)
+{
+	shadow_pkru = __rdpkru();
+}
+
+int main(void)
+{
+	int nr_iterations = 22;
+
+	srand((unsigned int)time(NULL));
+
+	setup_handlers();
+
+	printf("has pku: %d\n", cpu_has_pku());
+
+	if (!cpu_has_pku()) {
+		int size = PAGE_SIZE;
+		int *ptr;
+
+		printf("running PKEY tests for unsupported CPU/OS\n");
+
+		ptr  = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+		assert(ptr != (void *)-1);
+		test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
+		exit(0);
+	}
+
+	pkey_setup_shadow();
+	printf("startup pkru: %x\n", rdpkru());
+	setup_hugetlbfs();
+
+	while (nr_iterations-- > 0)
+		run_tests_once();
+
+	printf("done (all tests OK)\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
new file mode 100644
index 000000000..12aaa0631
--- /dev/null
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <asm/ptrace-abi.h>
+#include <sys/auxv.h>
+
+/* Bitness-agnostic defines for user_regs_struct fields. */
+#ifdef __x86_64__
+# define user_syscall_nr	orig_rax
+# define user_arg0		rdi
+# define user_arg1		rsi
+# define user_arg2		rdx
+# define user_arg3		r10
+# define user_arg4		r8
+# define user_arg5		r9
+# define user_ip		rip
+# define user_ax		rax
+#else
+# define user_syscall_nr	orig_eax
+# define user_arg0		ebx
+# define user_arg1		ecx
+# define user_arg2		edx
+# define user_arg3		esi
+# define user_arg4		edi
+# define user_arg5		ebp
+# define user_ip		eip
+# define user_ax		eax
+#endif
+
+static int nerrs = 0;
+
+struct syscall_args32 {
+	uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5;
+};
+
+#ifdef __i386__
+extern void sys32_helper(struct syscall_args32 *, void *);
+extern void int80_and_ret(void);
+#endif
+
+/*
+ * Helper to invoke int80 with controlled regs and capture the final regs.
+ */
+static void do_full_int80(struct syscall_args32 *args)
+{
+#ifdef __x86_64__
+	register unsigned long bp asm("bp") = args->arg5;
+	asm volatile ("int $0x80"
+		      : "+a" (args->nr),
+			"+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
+			"+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
+			: : "r8", "r9", "r10", "r11");
+	args->arg5 = bp;
+#else
+	sys32_helper(args, int80_and_ret);
+#endif
+}
+
+#ifdef __i386__
+static void (*vsyscall32)(void);
+
+/*
+ * Nasty helper to invoke AT_SYSINFO (i.e. __kernel_vsyscall) with
+ * controlled regs and capture the final regs.  This is so nasty that it
+ * crashes my copy of gdb :)
+ */
+static void do_full_vsyscall32(struct syscall_args32 *args)
+{
+	sys32_helper(args, vsyscall32);
+}
+#endif
+
+static siginfo_t wait_trap(pid_t chld)
+{
+	siginfo_t si;
+	if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0)
+		err(1, "waitid");
+	if (si.si_pid != chld)
+		errx(1, "got unexpected pid in event\n");
+	if (si.si_code != CLD_TRAPPED)
+		errx(1, "got unexpected event type %d\n", si.si_code);
+	return si;
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void setsigign(int sig, int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = (void *)SIG_IGN;
+	sa.sa_flags = flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+#ifdef __x86_64__
+# define REG_BP REG_RBP
+#else
+# define REG_BP REG_EBP
+#endif
+
+static void empty_handler(int sig, siginfo_t *si, void *ctx_void)
+{
+}
+
+static void test_sys32_regs(void (*do_syscall)(struct syscall_args32 *))
+{
+	struct syscall_args32 args = {
+		.nr = 224,	/* gettid */
+		.arg0 = 10, .arg1 = 11, .arg2 = 12,
+		.arg3 = 13, .arg4 = 14, .arg5 = 15,
+	};
+
+	do_syscall(&args);
+
+	if (args.nr != getpid() ||
+	    args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 ||
+	    args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
+		printf("[FAIL]\tgetpid() failed to preserve regs\n");
+		nerrs++;
+	} else {
+		printf("[OK]\tgetpid() preserves regs\n");
+	}
+
+	sethandler(SIGUSR1, empty_handler, 0);
+
+	args.nr = 37;	/* kill */
+	args.arg0 = getpid();
+	args.arg1 = SIGUSR1;
+	do_syscall(&args);
+	if (args.nr != 0 ||
+	    args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 ||
+	    args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
+		printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preserve regs\n");
+		nerrs++;
+	} else {
+		printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n");
+	}
+	clearhandler(SIGUSR1);
+}
+
+static void test_ptrace_syscall_restart(void)
+{
+	printf("[RUN]\tptrace-induced syscall restart\n");
+	pid_t chld = fork();
+	if (chld < 0)
+		err(1, "fork");
+
+	if (chld == 0) {
+		if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+			err(1, "PTRACE_TRACEME");
+
+		pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+		printf("\tChild will make one syscall\n");
+		syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+		syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
+		_exit(0);
+	}
+
+	int status;
+
+	/* Wait for SIGSTOP. */
+	if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+		err(1, "waitpid");
+
+	struct user_regs_struct regs;
+
+	printf("[RUN]\tSYSEMU\n");
+	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSEMU");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_gettid ||
+	    regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+	    regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+	    regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+		printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tInitial nr and args are correct\n");
+	}
+
+	printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n",
+	       (unsigned long)regs.user_ip);
+
+	/*
+	 * This does exactly what it appears to do if syscall is int80 or
+	 * SYSCALL64.  For SYSCALL32 or SYSENTER, though, this is highly
+	 * magical.  It needs to work so that ptrace and syscall restart
+	 * work as expected.
+	 */
+	regs.user_ax = regs.user_syscall_nr;
+	regs.user_ip -= 2;
+	if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_SETREGS");
+
+	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSEMU");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_gettid ||
+	    regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+	    regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+	    regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+		printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tRestarted nr and args are correct\n");
+	}
+
+	printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n",
+	       (unsigned long)regs.user_ip);
+
+	regs.user_ax = SYS_getpid;
+	regs.user_arg0 = 20;
+	regs.user_arg1 = 21;
+	regs.user_arg2 = 22;
+	regs.user_arg3 = 23;
+	regs.user_arg4 = 24;
+	regs.user_arg5 = 25;
+	regs.user_ip -= 2;
+
+	if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_SETREGS");
+
+	if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSEMU");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_getpid ||
+	    regs.user_arg0 != 20 || regs.user_arg1 != 21 || regs.user_arg2 != 22 ||
+	    regs.user_arg3 != 23 || regs.user_arg4 != 24 || regs.user_arg5 != 25) {
+		printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tReplacement nr and args are correct\n");
+	}
+
+	if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+		err(1, "PTRACE_CONT");
+	if (waitpid(chld, &status, 0) != chld)
+		err(1, "waitpid");
+	if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+		printf("[FAIL]\tChild failed\n");
+		nerrs++;
+	} else {
+		printf("[OK]\tChild exited cleanly\n");
+	}
+}
+
+static void test_restart_under_ptrace(void)
+{
+	printf("[RUN]\tkernel syscall restart under ptrace\n");
+	pid_t chld = fork();
+	if (chld < 0)
+		err(1, "fork");
+
+	if (chld == 0) {
+		if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+			err(1, "PTRACE_TRACEME");
+
+		pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+		printf("\tChild will take a nap until signaled\n");
+		setsigign(SIGUSR1, SA_RESTART);
+		syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+		syscall(SYS_pause, 0, 0, 0, 0, 0, 0);
+		_exit(0);
+	}
+
+	int status;
+
+	/* Wait for SIGSTOP. */
+	if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+		err(1, "waitpid");
+
+	struct user_regs_struct regs;
+
+	printf("[RUN]\tSYSCALL\n");
+	if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSCALL");
+	wait_trap(chld);
+
+	/* We should be stopped at pause(2) entry. */
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_pause ||
+	    regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+	    regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+	    regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+		printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tInitial nr and args are correct\n");
+	}
+
+	/* Interrupt it. */
+	kill(chld, SIGUSR1);
+
+	/* Advance.  We should be stopped at exit. */
+	printf("[RUN]\tSYSCALL\n");
+	if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSCALL");
+	wait_trap(chld);
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_pause ||
+	    regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+	    regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+	    regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+		printf("[FAIL]\tArgs after SIGUSR1 are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tArgs after SIGUSR1 are correct (ax = %ld)\n",
+		       (long)regs.user_ax);
+	}
+
+	/* Poke the regs back in.  This must not break anything. */
+	if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_SETREGS");
+
+	/* Catch the (ignored) SIGUSR1. */
+	if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+		err(1, "PTRACE_CONT");
+	if (waitpid(chld, &status, 0) != chld)
+		err(1, "waitpid");
+	if (!WIFSTOPPED(status)) {
+		printf("[FAIL]\tChild was stopped for SIGUSR1 (status = 0x%x)\n", status);
+		nerrs++;
+	} else {
+		printf("[OK]\tChild got SIGUSR1\n");
+	}
+
+	/* The next event should be pause(2) again. */
+	printf("[RUN]\tStep again\n");
+	if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+		err(1, "PTRACE_SYSCALL");
+	wait_trap(chld);
+
+	/* We should be stopped at pause(2) entry. */
+
+	if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+		err(1, "PTRACE_GETREGS");
+
+	if (regs.user_syscall_nr != SYS_pause ||
+	    regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+	    regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+	    regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+		printf("[FAIL]\tpause did not restart (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+		nerrs++;
+	} else {
+		printf("[OK]\tpause(2) restarted correctly\n");
+	}
+
+	/* Kill it. */
+	kill(chld, SIGKILL);
+	if (waitpid(chld, &status, 0) != chld)
+		err(1, "waitpid");
+}
+
+int main()
+{
+	printf("[RUN]\tCheck int80 return regs\n");
+	test_sys32_regs(do_full_int80);
+
+#if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16)
+	vsyscall32 = (void *)getauxval(AT_SYSINFO);
+	if (vsyscall32) {
+		printf("[RUN]\tCheck AT_SYSINFO return regs\n");
+		test_sys32_regs(do_full_vsyscall32);
+	} else {
+		printf("[SKIP]\tAT_SYSINFO is not available\n");
+	}
+#endif
+
+	test_ptrace_syscall_restart();
+
+	test_restart_under_ptrace();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S
new file mode 100644
index 000000000..94410fa2b
--- /dev/null
+++ b/tools/testing/selftests/x86/raw_syscall_helper_32.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.global sys32_helper
+sys32_helper:
+	/* Args: syscall_args_32*, function pointer */
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	5*4(%esp), %eax	/* pointer to args struct */
+
+	movl	1*4(%eax), %ebx
+	movl	2*4(%eax), %ecx
+	movl	3*4(%eax), %edx
+	movl	4*4(%eax), %esi
+	movl	5*4(%eax), %edi
+	movl	6*4(%eax), %ebp
+	movl	0*4(%eax), %eax
+
+	call	*(6*4)(%esp)	/* Do the syscall */
+
+	/* Now we need to recover without losing any reg values */
+	pushl	%eax
+	movl	6*4(%esp), %eax
+	popl	0*4(%eax)
+	movl	%ebx, 1*4(%eax)
+	movl	%ecx, 2*4(%eax)
+	movl	%edx, 3*4(%eax)
+	movl	%esi, 4*4(%eax)
+	movl	%edi, 5*4(%eax)
+	movl	%ebp, 6*4(%eax)
+
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+
+	.type sys32_helper, @function
+	.size sys32_helper, .-sys32_helper
+
+.global int80_and_ret
+int80_and_ret:
+	int	$0x80
+	ret
+
+	.type int80_and_ret, @function
+	.size int80_and_ret, .-int80_and_ret
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
new file mode 100644
index 000000000..4d9dc3f2f
--- /dev/null
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -0,0 +1,871 @@
+/*
+ * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This is a series of tests that exercises the sigreturn(2) syscall and
+ * the IRET / SYSRET paths in the kernel.
+ *
+ * For now, this focuses on the effects of unusual CS and SS values,
+ * and it has a bunch of tests to make sure that ESP/RSP is restored
+ * properly.
+ *
+ * The basic idea behind these tests is to raise(SIGUSR1) to create a
+ * sigcontext frame, plug in the values to be tested, and then return,
+ * which implicitly invokes sigreturn(2) and programs the user context
+ * as desired.
+ *
+ * For tests for which we expect sigreturn and the subsequent return to
+ * user mode to succeed, we return to a short trampoline that generates
+ * SIGTRAP so that the meat of the tests can be ordinary C code in a
+ * SIGTRAP handler.
+ *
+ * The inner workings of each test is documented below.
+ *
+ * Do not run on outdated, unpatched kernels at risk of nasty crashes.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+/* Pull in AR_xyz defines. */
+typedef unsigned int u32;
+typedef unsigned short u16;
+#include "../../../../arch/x86/include/asm/desc_defs.h"
+
+/*
+ * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
+ * headers.
+ */
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext.  All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ *     saved CS is not 64-bit)
+ *         new SS = saved SS  (will fail IRET and signal if invalid)
+ * else
+ *         new SS = a flat 32-bit data segment
+ */
+#define UC_SIGCONTEXT_SS       0x2
+#define UC_STRICT_RESTORE_SS   0x4
+#endif
+
+/*
+ * In principle, this test can run on Linux emulation layers (e.g.
+ * Illumos "LX branded zones").  Solaris-based kernels reserve LDT
+ * entries 0-5 for their own internal purposes, so start our LDT
+ * allocations above that reservation.  (The tests don't pass on LX
+ * branded zones, but at least this lets them run.)
+ */
+#define LDT_OFFSET 6
+
+/* An aligned stack accessible through some of our segments. */
+static unsigned char stack16[65536] __attribute__((aligned(4096)));
+
+/*
+ * An aligned int3 instruction used as a trampoline.  Some of the tests
+ * want to fish out their ss values, so this trampoline copies ss to eax
+ * before the int3.
+ */
+asm (".pushsection .text\n\t"
+     ".type int3, @function\n\t"
+     ".align 4096\n\t"
+     "int3:\n\t"
+     "mov %ss,%ecx\n\t"
+     "int3\n\t"
+     ".size int3, . - int3\n\t"
+     ".align 4096, 0xcc\n\t"
+     ".popsection");
+extern char int3[4096];
+
+/*
+ * At startup, we prepapre:
+ *
+ * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
+ *   descriptor or out of bounds).
+ * - code16_sel: A 16-bit LDT code segment pointing to int3.
+ * - data16_sel: A 16-bit LDT data segment pointing to stack16.
+ * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
+ * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
+ * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
+ * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
+ *   stack16.
+ *
+ * For no particularly good reason, xyz_sel is a selector value with the
+ * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
+ * descriptor table.  These variables will be zero if their respective
+ * segments could not be allocated.
+ */
+static unsigned short ldt_nonexistent_sel;
+static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
+
+static unsigned short gdt_data16_idx, gdt_npdata32_idx;
+
+static unsigned short GDT3(int idx)
+{
+	return (idx << 3) | 3;
+}
+
+static unsigned short LDT3(int idx)
+{
+	return (idx << 3) | 7;
+}
+
+/* Our sigaltstack scratch space. */
+static char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void add_ldt(const struct user_desc *desc, unsigned short *var,
+		    const char *name)
+{
+	if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
+		*var = LDT3(desc->entry_number);
+	} else {
+		printf("[NOTE]\tFailed to create %s segment\n", name);
+		*var = 0;
+	}
+}
+
+static void setup_ldt(void)
+{
+	if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
+		errx(1, "stack16 is too high\n");
+	if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
+		errx(1, "int3 is too high\n");
+
+	ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
+
+	const struct user_desc code16_desc = {
+		.entry_number    = LDT_OFFSET + 0,
+		.base_addr       = (unsigned long)int3,
+		.limit           = 4095,
+		.seg_32bit       = 0,
+		.contents        = 2, /* Code, not conforming */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 0,
+		.useable         = 0
+	};
+	add_ldt(&code16_desc, &code16_sel, "code16");
+
+	const struct user_desc data16_desc = {
+		.entry_number    = LDT_OFFSET + 1,
+		.base_addr       = (unsigned long)stack16,
+		.limit           = 0xffff,
+		.seg_32bit       = 0,
+		.contents        = 0, /* Data, grow-up */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 0,
+		.useable         = 0
+	};
+	add_ldt(&data16_desc, &data16_sel, "data16");
+
+	const struct user_desc npcode32_desc = {
+		.entry_number    = LDT_OFFSET + 3,
+		.base_addr       = (unsigned long)int3,
+		.limit           = 4095,
+		.seg_32bit       = 1,
+		.contents        = 2, /* Code, not conforming */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 1,
+		.useable         = 0
+	};
+	add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
+
+	const struct user_desc npdata32_desc = {
+		.entry_number    = LDT_OFFSET + 4,
+		.base_addr       = (unsigned long)stack16,
+		.limit           = 0xffff,
+		.seg_32bit       = 1,
+		.contents        = 0, /* Data, grow-up */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 1,
+		.useable         = 0
+	};
+	add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
+
+	struct user_desc gdt_data16_desc = {
+		.entry_number    = -1,
+		.base_addr       = (unsigned long)stack16,
+		.limit           = 0xffff,
+		.seg_32bit       = 0,
+		.contents        = 0, /* Data, grow-up */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 0,
+		.useable         = 0
+	};
+
+	if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
+		/*
+		 * This probably indicates vulnerability to CVE-2014-8133.
+		 * Merely getting here isn't definitive, though, and we'll
+		 * diagnose the problem for real later on.
+		 */
+		printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
+		       gdt_data16_desc.entry_number);
+		gdt_data16_idx = gdt_data16_desc.entry_number;
+	} else {
+		printf("[OK]\tset_thread_area refused 16-bit data\n");
+	}
+
+	struct user_desc gdt_npdata32_desc = {
+		.entry_number    = -1,
+		.base_addr       = (unsigned long)stack16,
+		.limit           = 0xffff,
+		.seg_32bit       = 1,
+		.contents        = 0, /* Data, grow-up */
+		.read_exec_only  = 0,
+		.limit_in_pages  = 0,
+		.seg_not_present = 1,
+		.useable         = 0
+	};
+
+	if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
+		/*
+		 * As a hardening measure, newer kernels don't allow this.
+		 */
+		printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
+		       gdt_npdata32_desc.entry_number);
+		gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
+	} else {
+		printf("[OK]\tset_thread_area refused 16-bit data\n");
+	}
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs, requested_regs, resulting_regs;
+
+/* Instructions for the SIGUSR1 handler. */
+static volatile unsigned short sig_cs, sig_ss;
+static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
+#ifdef __x86_64__
+static volatile sig_atomic_t sig_corrupt_final_ss;
+#endif
+
+/* Abstractions for some 32-bit vs 64-bit differences. */
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define REG_SP REG_RSP
+# define REG_CX REG_RCX
+
+struct selectors {
+	unsigned short cs, gs, fs, ss;
+};
+
+static unsigned short *ssptr(ucontext_t *ctx)
+{
+	struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+	return &sels->ss;
+}
+
+static unsigned short *csptr(ucontext_t *ctx)
+{
+	struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+	return &sels->cs;
+}
+#else
+# define REG_IP REG_EIP
+# define REG_SP REG_ESP
+# define REG_CX REG_ECX
+
+static greg_t *ssptr(ucontext_t *ctx)
+{
+	return &ctx->uc_mcontext.gregs[REG_SS];
+}
+
+static greg_t *csptr(ucontext_t *ctx)
+{
+	return &ctx->uc_mcontext.gregs[REG_CS];
+}
+#endif
+
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+int cs_bitness(unsigned short cs)
+{
+	uint32_t valid = 0, ar;
+	asm ("lar %[cs], %[ar]\n\t"
+	     "jnz 1f\n\t"
+	     "mov $1, %[valid]\n\t"
+	     "1:"
+	     : [ar] "=r" (ar), [valid] "+rm" (valid)
+	     : [cs] "r" (cs));
+
+	if (!valid)
+		return -1;
+
+	bool db = (ar & (1 << 22));
+	bool l = (ar & (1 << 21));
+
+	if (!(ar & (1<<11)))
+	    return -1;	/* Not code. */
+
+	if (l && !db)
+		return 64;
+	else if (!l && db)
+		return 32;
+	else if (!l && !db)
+		return 16;
+	else
+		return -1;	/* Unknown bitness. */
+}
+
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+bool is_valid_ss(unsigned short cs)
+{
+	uint32_t valid = 0, ar;
+	asm ("lar %[cs], %[ar]\n\t"
+	     "jnz 1f\n\t"
+	     "mov $1, %[valid]\n\t"
+	     "1:"
+	     : [ar] "=r" (ar), [valid] "+rm" (valid)
+	     : [cs] "r" (cs));
+
+	if (!valid)
+		return false;
+
+	if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
+	    (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
+		return false;
+
+	return (ar & AR_P);
+}
+
+/* Number of errors in the current test case. */
+static volatile sig_atomic_t nerrs;
+
+static void validate_signal_ss(int sig, ucontext_t *ctx)
+{
+#ifdef __x86_64__
+	bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
+
+	if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
+		printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
+		nerrs++;
+
+		/*
+		 * This happens on Linux 4.1.  The rest will fail, too, so
+		 * return now to reduce the noise.
+		 */
+		return;
+	}
+
+	/* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
+	if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
+		printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
+		       sig);
+		nerrs++;
+	}
+
+	if (is_valid_ss(*ssptr(ctx))) {
+		/*
+		 * DOSEMU was written before 64-bit sigcontext had SS, and
+		 * it tries to figure out the signal source SS by looking at
+		 * the physical register.  Make sure that keeps working.
+		 */
+		unsigned short hw_ss;
+		asm ("mov %%ss, %0" : "=rm" (hw_ss));
+		if (hw_ss != *ssptr(ctx)) {
+			printf("[FAIL]\tHW SS didn't match saved SS\n");
+			nerrs++;
+		}
+	}
+#endif
+}
+
+/*
+ * SIGUSR1 handler.  Sets CS and SS as requested and points IP to the
+ * int3 trampoline.  Sets SP to a large known value so that we can see
+ * whether the value round-trips back to user mode correctly.
+ */
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	validate_signal_ss(sig, ctx);
+
+	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+	*csptr(ctx) = sig_cs;
+	*ssptr(ctx) = sig_ss;
+
+	ctx->uc_mcontext.gregs[REG_IP] =
+		sig_cs == code16_sel ? 0 : (unsigned long)&int3;
+	ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
+	ctx->uc_mcontext.gregs[REG_CX] = 0;
+
+	memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+	requested_regs[REG_CX] = *ssptr(ctx);	/* The asm code does this. */
+
+	return;
+}
+
+/*
+ * Called after a successful sigreturn (via int3) or from a failed
+ * sigreturn (directly by kernel).  Restores our state so that the
+ * original raise(SIGUSR1) returns.
+ */
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	validate_signal_ss(sig, ctx);
+
+	sig_err = ctx->uc_mcontext.gregs[REG_ERR];
+	sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
+
+	unsigned short ss;
+	asm ("mov %%ss,%0" : "=r" (ss));
+
+	greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
+	if (asm_ss != sig_ss && sig == SIGTRAP) {
+		/* Sanity check failure. */
+		printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
+		       ss, *ssptr(ctx), (unsigned long long)asm_ss);
+		nerrs++;
+	}
+
+	memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+#ifdef __x86_64__
+	if (sig_corrupt_final_ss) {
+		if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
+			printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
+			nerrs++;
+		} else {
+			/*
+			 * DOSEMU transitions from 32-bit to 64-bit mode by
+			 * adjusting sigcontext, and it requires that this work
+			 * even if the saved SS is bogus.
+			 */
+			printf("\tCorrupting SS on return to 64-bit mode\n");
+			*ssptr(ctx) = 0;
+		}
+	}
+#endif
+
+	sig_trapped = sig;
+}
+
+#ifdef __x86_64__
+/* Tests recovery if !UC_STRICT_RESTORE_SS */
+static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
+		printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
+		nerrs++;
+		return;  /* We can't do the rest. */
+	}
+
+	ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
+	*ssptr(ctx) = 0;
+
+	/* Return.  The kernel should recover without sending another signal. */
+}
+
+static int test_nonstrict_ss(void)
+{
+	clearhandler(SIGUSR1);
+	clearhandler(SIGTRAP);
+	clearhandler(SIGSEGV);
+	clearhandler(SIGILL);
+	sethandler(SIGUSR2, sigusr2, 0);
+
+	nerrs = 0;
+
+	printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
+	raise(SIGUSR2);
+	if (!nerrs)
+		printf("[OK]\tIt worked\n");
+
+	return nerrs;
+}
+#endif
+
+/* Finds a usable code segment of the requested bitness. */
+int find_cs(int bitness)
+{
+	unsigned short my_cs;
+
+	asm ("mov %%cs,%0" :  "=r" (my_cs));
+
+	if (cs_bitness(my_cs) == bitness)
+		return my_cs;
+	if (cs_bitness(my_cs + (2 << 3)) == bitness)
+		return my_cs + (2 << 3);
+	if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
+	    return my_cs - (2 << 3);
+	if (cs_bitness(code16_sel) == bitness)
+		return code16_sel;
+
+	printf("[WARN]\tCould not find %d-bit CS\n", bitness);
+	return -1;
+}
+
+static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
+{
+	int cs = find_cs(cs_bits);
+	if (cs == -1) {
+		printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
+		       cs_bits, use_16bit_ss ? 16 : 32);
+		return 0;
+	}
+
+	if (force_ss != -1) {
+		sig_ss = force_ss;
+	} else {
+		if (use_16bit_ss) {
+			if (!data16_sel) {
+				printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
+				       cs_bits);
+				return 0;
+			}
+			sig_ss = data16_sel;
+		} else {
+			asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
+		}
+	}
+
+	sig_cs = cs;
+
+	printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
+	       cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
+	       (sig_ss & 4) ? "" : ", GDT");
+
+	raise(SIGUSR1);
+
+	nerrs = 0;
+
+	/*
+	 * Check that each register had an acceptable value when the
+	 * int3 trampoline was invoked.
+	 */
+	for (int i = 0; i < NGREG; i++) {
+		greg_t req = requested_regs[i], res = resulting_regs[i];
+
+		if (i == REG_TRAPNO || i == REG_IP)
+			continue;	/* don't care */
+
+		if (i == REG_SP) {
+			/*
+			 * If we were using a 16-bit stack segment, then
+			 * the kernel is a bit stuck: IRET only restores
+			 * the low 16 bits of ESP/RSP if SS is 16-bit.
+			 * The kernel uses a hack to restore bits 31:16,
+			 * but that hack doesn't help with bits 63:32.
+			 * On Intel CPUs, bits 63:32 end up zeroed, and, on
+			 * AMD CPUs, they leak the high bits of the kernel
+			 * espfix64 stack pointer.  There's very little that
+			 * the kernel can do about it.
+			 *
+			 * Similarly, if we are returning to a 32-bit context,
+			 * the CPU will often lose the high 32 bits of RSP.
+			 */
+
+			if (res == req)
+				continue;
+
+			if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
+				printf("[NOTE]\tSP: %llx -> %llx\n",
+				       (unsigned long long)req,
+				       (unsigned long long)res);
+				continue;
+			}
+
+			printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
+			       (unsigned long long)requested_regs[i],
+			       (unsigned long long)resulting_regs[i]);
+			nerrs++;
+			continue;
+		}
+
+		bool ignore_reg = false;
+#if __i386__
+		if (i == REG_UESP)
+			ignore_reg = true;
+#else
+		if (i == REG_CSGSFS) {
+			struct selectors *req_sels =
+				(void *)&requested_regs[REG_CSGSFS];
+			struct selectors *res_sels =
+				(void *)&resulting_regs[REG_CSGSFS];
+			if (req_sels->cs != res_sels->cs) {
+				printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
+				       req_sels->cs, res_sels->cs);
+				nerrs++;
+			}
+
+			if (req_sels->ss != res_sels->ss) {
+				printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
+				       req_sels->ss, res_sels->ss);
+				nerrs++;
+			}
+
+			continue;
+		}
+#endif
+
+		/* Sanity check on the kernel */
+		if (i == REG_CX && req != res) {
+			printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
+			       (unsigned long long)req,
+			       (unsigned long long)res);
+			nerrs++;
+			continue;
+		}
+
+		if (req != res && !ignore_reg) {
+			printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
+			       i, (unsigned long long)req,
+			       (unsigned long long)res);
+			nerrs++;
+		}
+	}
+
+	if (nerrs == 0)
+		printf("[OK]\tall registers okay\n");
+
+	return nerrs;
+}
+
+static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
+{
+	int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
+	if (cs == -1)
+		return 0;
+
+	sig_cs = cs;
+	sig_ss = ss;
+
+	printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
+	       cs_bits, sig_cs, sig_ss);
+
+	sig_trapped = 0;
+	raise(SIGUSR1);
+	if (sig_trapped) {
+		char errdesc[32] = "";
+		if (sig_err) {
+			const char *src = (sig_err & 1) ? " EXT" : "";
+			const char *table;
+			if ((sig_err & 0x6) == 0x0)
+				table = "GDT";
+			else if ((sig_err & 0x6) == 0x4)
+				table = "LDT";
+			else if ((sig_err & 0x6) == 0x2)
+				table = "IDT";
+			else
+				table = "???";
+
+			sprintf(errdesc, "%s%s index %d, ",
+				table, src, sig_err >> 3);
+		}
+
+		char trapname[32];
+		if (sig_trapno == 13)
+			strcpy(trapname, "GP");
+		else if (sig_trapno == 11)
+			strcpy(trapname, "NP");
+		else if (sig_trapno == 12)
+			strcpy(trapname, "SS");
+		else if (sig_trapno == 32)
+			strcpy(trapname, "IRET");  /* X86_TRAP_IRET */
+		else
+			sprintf(trapname, "%d", sig_trapno);
+
+		printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
+		       trapname, (unsigned long)sig_err,
+		       errdesc, strsignal(sig_trapped));
+		return 0;
+	} else {
+		/*
+		 * This also implicitly tests UC_STRICT_RESTORE_SS:
+		 * We check that these signals set UC_STRICT_RESTORE_SS and,
+		 * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
+		 * then we won't get SIGSEGV.
+		 */
+		printf("[FAIL]\tDid not get SIGSEGV\n");
+		return 1;
+	}
+}
+
+int main()
+{
+	int total_nerrs = 0;
+	unsigned short my_cs, my_ss;
+
+	asm volatile ("mov %%cs,%0" : "=r" (my_cs));
+	asm volatile ("mov %%ss,%0" : "=r" (my_ss));
+	setup_ldt();
+
+	stack_t stack = {
+		.ss_sp = altstack_data,
+		.ss_size = SIGSTKSZ,
+	};
+	if (sigaltstack(&stack, NULL) != 0)
+		err(1, "sigaltstack");
+
+	sethandler(SIGUSR1, sigusr1, 0);
+	sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+	/* Easy cases: return to a 32-bit SS in each possible CS bitness. */
+	total_nerrs += test_valid_sigreturn(64, false, -1);
+	total_nerrs += test_valid_sigreturn(32, false, -1);
+	total_nerrs += test_valid_sigreturn(16, false, -1);
+
+	/*
+	 * Test easy espfix cases: return to a 16-bit LDT SS in each possible
+	 * CS bitness.  NB: with a long mode CS, the SS bitness is irrelevant.
+	 *
+	 * This catches the original missing-espfix-on-64-bit-kernels issue
+	 * as well as CVE-2014-8134.
+	 */
+	total_nerrs += test_valid_sigreturn(64, true, -1);
+	total_nerrs += test_valid_sigreturn(32, true, -1);
+	total_nerrs += test_valid_sigreturn(16, true, -1);
+
+	if (gdt_data16_idx) {
+		/*
+		 * For performance reasons, Linux skips espfix if SS points
+		 * to the GDT.  If we were able to allocate a 16-bit SS in
+		 * the GDT, see if it leaks parts of the kernel stack pointer.
+		 *
+		 * This tests for CVE-2014-8133.
+		 */
+		total_nerrs += test_valid_sigreturn(64, true,
+						    GDT3(gdt_data16_idx));
+		total_nerrs += test_valid_sigreturn(32, true,
+						    GDT3(gdt_data16_idx));
+		total_nerrs += test_valid_sigreturn(16, true,
+						    GDT3(gdt_data16_idx));
+	}
+
+#ifdef __x86_64__
+	/* Nasty ABI case: check SS corruption handling. */
+	sig_corrupt_final_ss = 1;
+	total_nerrs += test_valid_sigreturn(32, false, -1);
+	total_nerrs += test_valid_sigreturn(32, true, -1);
+	sig_corrupt_final_ss = 0;
+#endif
+
+	/*
+	 * We're done testing valid sigreturn cases.  Now we test states
+	 * for which sigreturn itself will succeed but the subsequent
+	 * entry to user mode will fail.
+	 *
+	 * Depending on the failure mode and the kernel bitness, these
+	 * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
+	 */
+	clearhandler(SIGTRAP);
+	sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
+	sethandler(SIGBUS, sigtrap, SA_ONSTACK);
+	sethandler(SIGILL, sigtrap, SA_ONSTACK);  /* 32-bit kernels do this */
+
+	/* Easy failures: invalid SS, resulting in #GP(0) */
+	test_bad_iret(64, ldt_nonexistent_sel, -1);
+	test_bad_iret(32, ldt_nonexistent_sel, -1);
+	test_bad_iret(16, ldt_nonexistent_sel, -1);
+
+	/* These fail because SS isn't a data segment, resulting in #GP(SS) */
+	test_bad_iret(64, my_cs, -1);
+	test_bad_iret(32, my_cs, -1);
+	test_bad_iret(16, my_cs, -1);
+
+	/* Try to return to a not-present code segment, triggering #NP(SS). */
+	test_bad_iret(32, my_ss, npcode32_sel);
+
+	/*
+	 * Try to return to a not-present but otherwise valid data segment.
+	 * This will cause IRET to fail with #SS on the espfix stack.  This
+	 * exercises CVE-2014-9322.
+	 *
+	 * Note that, if espfix is enabled, 64-bit Linux will lose track
+	 * of the actual cause of failure and report #GP(0) instead.
+	 * This would be very difficult for Linux to avoid, because
+	 * espfix64 causes IRET failures to be promoted to #DF, so the
+	 * original exception frame is never pushed onto the stack.
+	 */
+	test_bad_iret(32, npdata32_sel, -1);
+
+	/*
+	 * Try to return to a not-present but otherwise valid data
+	 * segment without invoking espfix.  Newer kernels don't allow
+	 * this to happen in the first place.  On older kernels, though,
+	 * this can trigger CVE-2014-9322.
+	 */
+	if (gdt_npdata32_idx)
+		test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
+
+#ifdef __x86_64__
+	total_nerrs += test_nonstrict_ss();
+#endif
+
+	return total_nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
new file mode 100644
index 000000000..ddfdd635d
--- /dev/null
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -0,0 +1,187 @@
+/*
+ * single_step_syscall.c - single-steps various x86 syscalls
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This is a very simple series of tests that makes system calls with
+ * the TF flag set.  This exercises some nasty kernel code in the
+ * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
+ * immediately issues #DB from CPL 0.  This requires special handling in
+ * the kernel.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define WIDTH "q"
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define REG_IP REG_EIP
+# define WIDTH "l"
+# define INT80_CLOBBERS
+#endif
+
+static unsigned long get_eflags(void)
+{
+	unsigned long eflags;
+	asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+	return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+	asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+		      : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (get_eflags() & X86_EFLAGS_TF) {
+		set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+		printf("[WARN]\tSIGTRAP handler had TF set\n");
+		_exit(1);
+	}
+
+	sig_traps++;
+
+	if (sig_traps == 10000 || sig_traps == 10001) {
+		printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
+		       (int)sig_traps,
+		       (unsigned long)info->si_addr,
+		       (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+	}
+}
+
+static void check_result(void)
+{
+	unsigned long new_eflags = get_eflags();
+	set_eflags(new_eflags & ~X86_EFLAGS_TF);
+
+	if (!sig_traps) {
+		printf("[FAIL]\tNo SIGTRAP\n");
+		exit(1);
+	}
+
+	if (!(new_eflags & X86_EFLAGS_TF)) {
+		printf("[FAIL]\tTF was cleared\n");
+		exit(1);
+	}
+
+	printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
+	sig_traps = 0;
+}
+
+int main()
+{
+#ifdef CAN_BUILD_32
+	int tmp;
+#endif
+
+	sethandler(SIGTRAP, sigtrap, 0);
+
+	printf("[RUN]\tSet TF and check nop\n");
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	asm volatile ("nop");
+	check_result();
+
+#ifdef __x86_64__
+	printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	extern unsigned char post_nop[];
+	asm volatile ("pushf" WIDTH "\n\t"
+		      "pop" WIDTH " %%r11\n\t"
+		      "nop\n\t"
+		      "post_nop:"
+		      : : "c" (post_nop) : "r11");
+	check_result();
+#endif
+#ifdef CAN_BUILD_32
+	printf("[RUN]\tSet TF and check int80\n");
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
+			: INT80_CLOBBERS);
+	check_result();
+#endif
+
+	/*
+	 * This test is particularly interesting if fast syscalls use
+	 * SYSENTER: it triggers a nasty design flaw in SYSENTER.
+	 * Specifically, SYSENTER does not clear TF, so either SYSENTER
+	 * or the next instruction traps at CPL0.  (Of course, Intel
+	 * mostly forgot to document exactly what happens here.)  So we
+	 * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
+	 * no stack.  The only sane way the kernel can possibly handle
+	 * it is to clear TF on return from the #DB handler, but this
+	 * happens way too early to set TF in the saved pt_regs, so the
+	 * kernel has to do something clever to avoid losing track of
+	 * the TF bit.
+	 *
+	 * Needless to say, we've had bugs in this area.
+	 */
+	syscall(SYS_getpid);  /* Force symbol binding without TF set. */
+	printf("[RUN]\tSet TF and check a fast syscall\n");
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	syscall(SYS_getpid);
+	check_result();
+
+	/* Now make sure that another fast syscall doesn't set TF again. */
+	printf("[RUN]\tFast syscall with TF cleared\n");
+	fflush(stdout);  /* Force a syscall */
+	if (get_eflags() & X86_EFLAGS_TF) {
+		printf("[FAIL]\tTF is now set\n");
+		exit(1);
+	}
+	if (sig_traps) {
+		printf("[FAIL]\tGot SIGTRAP\n");
+		exit(1);
+	}
+	printf("[OK]\tNothing unexpected happened\n");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
new file mode 100644
index 000000000..7db4fc9fa
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -0,0 +1,130 @@
+/*
+ * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <err.h>
+#include <setjmp.h>
+#include <errno.h>
+
+/* Our sigaltstack scratch space. */
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+static sigjmp_buf jmpbuf;
+
+static volatile sig_atomic_t n_errs;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
+		printf("[FAIL]\tAX had the wrong value: 0x%x\n",
+		       ctx->uc_mcontext.gregs[REG_EAX]);
+		n_errs++;
+	} else {
+		printf("[OK]\tSeems okay\n");
+	}
+
+	siglongjmp(jmpbuf, 1);
+}
+
+static void sigill(int sig, siginfo_t *info, void *ctx_void)
+{
+	printf("[SKIP]\tIllegal instruction\n");
+	siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+	stack_t stack = {
+		.ss_sp = altstack_data,
+		.ss_size = SIGSTKSZ,
+	};
+	if (sigaltstack(&stack, NULL) != 0)
+		err(1, "sigaltstack");
+
+	sethandler(SIGSEGV, sigsegv, SA_ONSTACK);
+	sethandler(SIGILL, sigill, SA_ONSTACK);
+
+	/*
+	 * Exercise another nasty special case.  The 32-bit SYSCALL
+	 * and SYSENTER instructions (even in compat mode) each
+	 * clobber one register.  A Linux system call has a syscall
+	 * number and six arguments, and the user stack pointer
+	 * needs to live in some register on return.  That means
+	 * that we need eight registers, but SYSCALL and SYSENTER
+	 * only preserve seven registers.  As a result, one argument
+	 * ends up on the stack.  The stack is user memory, which
+	 * means that the kernel can fail to read it.
+	 *
+	 * The 32-bit fast system calls don't have a defined ABI:
+	 * we're supposed to invoke them through the vDSO.  So we'll
+	 * fudge it: we set all regs to invalid pointer values and
+	 * invoke the entry instruction.  The return will fail no
+	 * matter what, and we completely lose our program state,
+	 * but we can fix it up with a signal handler.
+	 */
+
+	printf("[RUN]\tSYSENTER with invalid state\n");
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		asm volatile (
+			"movl $-1, %%eax\n\t"
+			"movl $-1, %%ebx\n\t"
+			"movl $-1, %%ecx\n\t"
+			"movl $-1, %%edx\n\t"
+			"movl $-1, %%esi\n\t"
+			"movl $-1, %%edi\n\t"
+			"movl $-1, %%ebp\n\t"
+			"movl $-1, %%esp\n\t"
+			"sysenter"
+			: : : "memory", "flags");
+	}
+
+	printf("[RUN]\tSYSCALL with invalid state\n");
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		asm volatile (
+			"movl $-1, %%eax\n\t"
+			"movl $-1, %%ebx\n\t"
+			"movl $-1, %%ecx\n\t"
+			"movl $-1, %%edx\n\t"
+			"movl $-1, %%esi\n\t"
+			"movl $-1, %%edi\n\t"
+			"movl $-1, %%ebp\n\t"
+			"movl $-1, %%esp\n\t"
+			"syscall\n\t"
+			"pushl $0"	/* make sure we segfault cleanly */
+			: : : "memory", "flags");
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
new file mode 100644
index 000000000..74e6b3fc2
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -0,0 +1,96 @@
+/*
+ * syscall_nt.c - checks syscalls with NT set
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Some obscure user-space code requires the ability to make system calls
+ * with FLAGS.NT set.  Make sure it works.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <err.h>
+#include <sys/syscall.h>
+#include <asm/processor-flags.h>
+
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
+static unsigned int nerrs;
+
+static unsigned long get_eflags(void)
+{
+	unsigned long eflags;
+	asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+	return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+	asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+		      : : "rm" (eflags) : "flags");
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+}
+
+static void do_it(unsigned long extraflags)
+{
+	unsigned long flags;
+
+	set_eflags(get_eflags() | extraflags);
+	syscall(SYS_getpid);
+	flags = get_eflags();
+	set_eflags(X86_EFLAGS_IF | X86_EFLAGS_FIXED);
+	if ((flags & extraflags) == extraflags) {
+		printf("[OK]\tThe syscall worked and flags are still set\n");
+	} else {
+		printf("[FAIL]\tThe syscall worked but flags were cleared (flags = 0x%lx but expected 0x%lx set)\n",
+		       flags, extraflags);
+		nerrs++;
+	}
+}
+
+int main(void)
+{
+	printf("[RUN]\tSet NT and issue a syscall\n");
+	do_it(X86_EFLAGS_NT);
+
+	/*
+	 * Now try it again with TF set -- TF forces returns via IRET in all
+	 * cases except non-ptregs-using 64-bit full fast path syscalls.
+	 */
+
+	sethandler(SIGTRAP, sigtrap, 0);
+
+	printf("[RUN]\tSet NT|TF and issue a syscall\n");
+	do_it(X86_EFLAGS_NT | X86_EFLAGS_TF);
+
+	return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
new file mode 100644
index 000000000..d85ec5b36
--- /dev/null
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -0,0 +1,195 @@
+/*
+ * sigreturn.c - tests that x86 avoids Intel SYSRET pitfalls
+ * Copyright (c) 2014-2016 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <sys/syscall.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <setjmp.h>
+#include <sys/user.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+
+asm (
+	".pushsection \".text\", \"ax\"\n\t"
+	".balign 4096\n\t"
+	"test_page: .globl test_page\n\t"
+	".fill 4094,1,0xcc\n\t"
+	"test_syscall_insn:\n\t"
+	"syscall\n\t"
+	".ifne . - test_page - 4096\n\t"
+	".error \"test page is not one page long\"\n\t"
+	".endif\n\t"
+	".popsection"
+    );
+
+extern const char test_page[];
+static void const *current_test_page_addr = test_page;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs;
+
+static volatile unsigned long rip;
+
+static void sigsegv_for_sigreturn_test(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
+		printf("[FAIL]\tRequested RIP=0x%lx but got RIP=0x%lx\n",
+		       rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]);
+		fflush(stdout);
+		_exit(1);
+	}
+
+	memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+	printf("[OK]\tGot SIGSEGV at RIP=0x%lx\n", rip);
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+	/* Set IP and CX to match so that SYSRET can happen. */
+	ctx->uc_mcontext.gregs[REG_RIP] = rip;
+	ctx->uc_mcontext.gregs[REG_RCX] = rip;
+
+	/* R11 and EFLAGS should already match. */
+	assert(ctx->uc_mcontext.gregs[REG_EFL] ==
+	       ctx->uc_mcontext.gregs[REG_R11]);
+
+	sethandler(SIGSEGV, sigsegv_for_sigreturn_test, SA_RESETHAND);
+
+	return;
+}
+
+static void test_sigreturn_to(unsigned long ip)
+{
+	rip = ip;
+	printf("[RUN]\tsigreturn to 0x%lx\n", ip);
+	raise(SIGUSR1);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv_for_fallthrough(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+	if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
+		printf("[FAIL]\tExpected SIGSEGV at 0x%lx but got RIP=0x%lx\n",
+		       rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]);
+		fflush(stdout);
+		_exit(1);
+	}
+
+	siglongjmp(jmpbuf, 1);
+}
+
+static void test_syscall_fallthrough_to(unsigned long ip)
+{
+	void *new_address = (void *)(ip - 4096);
+	void *ret;
+
+	printf("[RUN]\tTrying a SYSCALL that falls through to 0x%lx\n", ip);
+
+	ret = mremap((void *)current_test_page_addr, 4096, 4096,
+		     MREMAP_MAYMOVE | MREMAP_FIXED, new_address);
+	if (ret == MAP_FAILED) {
+		if (ip <= (1UL << 47) - PAGE_SIZE) {
+			err(1, "mremap to %p", new_address);
+		} else {
+			printf("[OK]\tmremap to %p failed\n", new_address);
+			return;
+		}
+	}
+
+	if (ret != new_address)
+		errx(1, "mremap malfunctioned: asked for %p but got %p\n",
+		     new_address, ret);
+
+	current_test_page_addr = new_address;
+	rip = ip;
+
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		asm volatile ("call *%[syscall_insn]" :: "a" (SYS_getpid),
+			      [syscall_insn] "rm" (ip - 2));
+		errx(1, "[FAIL]\tSyscall trampoline returned");
+	}
+
+	printf("[OK]\tWe survived\n");
+}
+
+int main()
+{
+	/*
+	 * When the kernel returns from a slow-path syscall, it will
+	 * detect whether SYSRET is appropriate.  If it incorrectly
+	 * thinks that SYSRET is appropriate when RIP is noncanonical,
+	 * it'll crash on Intel CPUs.
+	 */
+	sethandler(SIGUSR1, sigusr1, 0);
+	for (int i = 47; i < 64; i++)
+		test_sigreturn_to(1UL<<i);
+
+	clearhandler(SIGUSR1);
+
+	sethandler(SIGSEGV, sigsegv_for_fallthrough, 0);
+
+	/* One extra test to check that we didn't screw up the mremap logic. */
+	test_syscall_fallthrough_to((1UL << 47) - 2*PAGE_SIZE);
+
+	/* These are the interesting cases. */
+	for (int i = 47; i < 64; i++) {
+		test_syscall_fallthrough_to((1UL<<i) - PAGE_SIZE);
+		test_syscall_fallthrough_to(1UL<<i);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/sysret_ss_attrs.c b/tools/testing/selftests/x86/sysret_ss_attrs.c
new file mode 100644
index 000000000..ce42d5a64
--- /dev/null
+++ b/tools/testing/selftests/x86/sysret_ss_attrs.c
@@ -0,0 +1,112 @@
+/*
+ * sysret_ss_attrs.c - test that syscalls return valid hidden SS attributes
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * On AMD CPUs, SYSRET can return with a valid SS descriptor with with
+ * the hidden attributes set to an unusable state.  Make sure the kernel
+ * doesn't let this happen.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <pthread.h>
+
+static void *threadproc(void *ctx)
+{
+	/*
+	 * Do our best to cause sleeps on this CPU to exit the kernel and
+	 * re-enter with SS = 0.
+	 */
+	while (true)
+		;
+
+	return NULL;
+}
+
+#ifdef __x86_64__
+extern unsigned long call32_from_64(void *stack, void (*function)(void));
+
+asm (".pushsection .text\n\t"
+     ".code32\n\t"
+     "test_ss:\n\t"
+     "pushl $0\n\t"
+     "popl %eax\n\t"
+     "ret\n\t"
+     ".code64");
+extern void test_ss(void);
+#endif
+
+int main()
+{
+	/*
+	 * Start a busy-looping thread on the same CPU we're on.
+	 * For simplicity, just stick everything to CPU 0.  This will
+	 * fail in some containers, but that's probably okay.
+	 */
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		printf("[WARN]\tsched_setaffinity failed\n");
+
+	pthread_t thread;
+	if (pthread_create(&thread, 0, threadproc, 0) != 0)
+		err(1, "pthread_create");
+
+#ifdef __x86_64__
+	unsigned char *stack32 = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+				      MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE,
+				      -1, 0);
+	if (stack32 == MAP_FAILED)
+		err(1, "mmap");
+#endif
+
+	printf("[RUN]\tSyscalls followed by SS validation\n");
+
+	for (int i = 0; i < 1000; i++) {
+		/*
+		 * Go to sleep and return using sysret (if we're 64-bit
+		 * or we're 32-bit on AMD on a 64-bit kernel).  On AMD CPUs,
+		 * SYSRET doesn't fix up the cached SS descriptor, so the
+		 * kernel needs some kind of workaround to make sure that we
+		 * end the system call with a valid stack segment.  This
+		 * can be a confusing failure because the SS *selector*
+		 * is the same regardless.
+		 */
+		usleep(2);
+
+#ifdef __x86_64__
+		/*
+		 * On 32-bit, just doing a syscall through glibc is enough
+		 * to cause a crash if our cached SS descriptor is invalid.
+		 * On 64-bit, it's not, so try extra hard.
+		 */
+		call32_from_64(stack32 + 4088, test_ss);
+#endif
+	}
+
+	printf("[OK]\tWe survived\n");
+
+#ifdef __x86_64__
+	munmap(stack32, 4096);
+#endif
+
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/test_FCMOV.c b/tools/testing/selftests/x86/test_FCMOV.c
new file mode 100644
index 000000000..6b5036fbb
--- /dev/null
+++ b/tools/testing/selftests/x86/test_FCMOV.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+#define TEST(insn) \
+long double __attribute__((noinline)) insn(long flags) \
+{						\
+	long double out;			\
+	asm ("\n"				\
+	"	push	%1""\n"			\
+	"	popf""\n"			\
+	"	fldpi""\n"			\
+	"	fld1""\n"			\
+	"	" #insn " %%st(1), %%st" "\n"	\
+	"	ffree	%%st(1)" "\n"		\
+	: "=t" (out)				\
+	: "r" (flags)				\
+	);					\
+	return out;				\
+}
+
+TEST(fcmovb)
+TEST(fcmove)
+TEST(fcmovbe)
+TEST(fcmovu)
+TEST(fcmovnb)
+TEST(fcmovne)
+TEST(fcmovnbe)
+TEST(fcmovnu)
+
+enum {
+	CF = 1 << 0,
+	PF = 1 << 2,
+	ZF = 1 << 6,
+};
+
+void sighandler(int sig)
+{
+	printf("[FAIL]\tGot signal %d, exiting\n", sig);
+	exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	int err = 0;
+
+	/* SIGILL triggers on 32-bit kernels w/o fcomi emulation
+	 * when run with "no387 nofxsr". Other signals are caught
+	 * just in case.
+	 */
+	signal(SIGILL, sighandler);
+	signal(SIGFPE, sighandler);
+	signal(SIGSEGV, sighandler);
+
+	printf("[RUN]\tTesting fcmovCC instructions\n");
+	/* If fcmovCC() returns 1.0, the move wasn't done */
+	err |= !(fcmovb(0)   == 1.0); err |= !(fcmovnb(0)  != 1.0);
+	err |= !(fcmove(0)   == 1.0); err |= !(fcmovne(0)  != 1.0);
+	err |= !(fcmovbe(0)  == 1.0); err |= !(fcmovnbe(0) != 1.0);
+	err |= !(fcmovu(0)   == 1.0); err |= !(fcmovnu(0)  != 1.0);
+
+	err |= !(fcmovb(CF)  != 1.0); err |= !(fcmovnb(CF)  == 1.0);
+	err |= !(fcmove(CF)  == 1.0); err |= !(fcmovne(CF)  != 1.0);
+	err |= !(fcmovbe(CF) != 1.0); err |= !(fcmovnbe(CF) == 1.0);
+	err |= !(fcmovu(CF)  == 1.0); err |= !(fcmovnu(CF)  != 1.0);
+
+	err |= !(fcmovb(ZF)  == 1.0); err |= !(fcmovnb(ZF)  != 1.0);
+	err |= !(fcmove(ZF)  != 1.0); err |= !(fcmovne(ZF)  == 1.0);
+	err |= !(fcmovbe(ZF) != 1.0); err |= !(fcmovnbe(ZF) == 1.0);
+	err |= !(fcmovu(ZF)  == 1.0); err |= !(fcmovnu(ZF)  != 1.0);
+
+	err |= !(fcmovb(PF)  == 1.0); err |= !(fcmovnb(PF)  != 1.0);
+	err |= !(fcmove(PF)  == 1.0); err |= !(fcmovne(PF)  != 1.0);
+	err |= !(fcmovbe(PF) == 1.0); err |= !(fcmovnbe(PF) != 1.0);
+	err |= !(fcmovu(PF)  != 1.0); err |= !(fcmovnu(PF)  == 1.0);
+
+        if (!err)
+                printf("[OK]\tfcmovCC\n");
+	else
+		printf("[FAIL]\tfcmovCC errors: %d\n", err);
+
+	return err;
+}
diff --git a/tools/testing/selftests/x86/test_FCOMI.c b/tools/testing/selftests/x86/test_FCOMI.c
new file mode 100644
index 000000000..aec6692c6
--- /dev/null
+++ b/tools/testing/selftests/x86/test_FCOMI.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <fenv.h>
+
+enum {
+	CF = 1 << 0,
+	PF = 1 << 2,
+	ZF = 1 << 6,
+	ARITH = CF | PF | ZF,
+};
+
+long res_fcomi_pi_1;
+long res_fcomi_1_pi;
+long res_fcomi_1_1;
+long res_fcomi_nan_1;
+/* sNaN is s|111 1111 1|1xx xxxx xxxx xxxx xxxx xxxx */
+/* qNaN is s|111 1111 1|0xx xxxx xxxx xxxx xxxx xxxx (some x must be nonzero) */
+int snan = 0x7fc11111;
+int qnan = 0x7f811111;
+unsigned short snan1[5];
+/* sNaN80 is s|111 1111 1111 1111 |10xx xx...xx (some x must be nonzero) */
+unsigned short snan80[5] = { 0x1111, 0x1111, 0x1111, 0x8111, 0x7fff };
+
+int test(long flags)
+{
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+	asm ("\n"
+
+	"	push	%0""\n"
+	"	popf""\n"
+	"	fld1""\n"
+	"	fldpi""\n"
+	"	fcomi	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	ffree	%%st(1)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_1_pi""\n"
+
+	"	push	%0""\n"
+	"	popf""\n"
+	"	fldpi""\n"
+	"	fld1""\n"
+	"	fcomi	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	ffree	%%st(1)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_pi_1""\n"
+
+	"	push	%0""\n"
+	"	popf""\n"
+	"	fld1""\n"
+	"	fld1""\n"
+	"	fcomi	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	ffree	%%st(1)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_1_1""\n"
+	:
+	: "r" (flags)
+	);
+	if ((res_fcomi_1_pi & ARITH) != (0)) {
+		printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags);
+		return 1;
+	}
+	if ((res_fcomi_pi_1 & ARITH) != (CF)) {
+		printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH);
+		return 1;
+	}
+	if ((res_fcomi_1_1 & ARITH) != (ZF)) {
+		printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags);
+		return 1;
+	}
+	if (fetestexcept(FE_INVALID) != 0) {
+		printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+		return 1;
+	}
+	return 0;
+}
+
+int test_qnan(long flags)
+{
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+	asm ("\n"
+	"	push	%0""\n"
+	"	popf""\n"
+	"	flds	qnan""\n"
+	"	fld1""\n"
+	"	fnclex""\n"		// fld of a qnan raised FE_INVALID, clear it
+	"	fcomi	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	ffree	%%st(1)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_nan_1""\n"
+	:
+	: "r" (flags)
+	);
+	if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+		printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+		return 1;
+	}
+	if (fetestexcept(FE_INVALID) != FE_INVALID) {
+		printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+		return 1;
+	}
+	return 0;
+}
+
+int testu_qnan(long flags)
+{
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+	asm ("\n"
+	"	push	%0""\n"
+	"	popf""\n"
+	"	flds	qnan""\n"
+	"	fld1""\n"
+	"	fnclex""\n"		// fld of a qnan raised FE_INVALID, clear it
+	"	fucomi	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	ffree	%%st(1)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_nan_1""\n"
+	:
+	: "r" (flags)
+	);
+	if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+		printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+		return 1;
+	}
+	if (fetestexcept(FE_INVALID) != 0) {
+		printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+		return 1;
+	}
+	return 0;
+}
+
+int testu_snan(long flags)
+{
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+	asm ("\n"
+	"	push	%0""\n"
+	"	popf""\n"
+//	"	flds	snan""\n"	// WRONG, this will convert 32-bit fp snan to a *qnan* in 80-bit fp register!
+//	"	fstpt	snan1""\n"	// if uncommented, it prints "snan1:7fff c111 1100 0000 0000" - c111, not 8111!
+//	"	fnclex""\n"		// flds of a snan raised FE_INVALID, clear it
+	"	fldt	snan80""\n"	// fldt never raise FE_INVALID
+	"	fld1""\n"
+	"	fucomi	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	ffree	%%st(1)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_nan_1""\n"
+	:
+	: "r" (flags)
+	);
+	if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+		printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+		return 1;
+	}
+//	printf("snan:%x snan1:%04x %04x %04x %04x %04x\n", snan, snan1[4], snan1[3], snan1[2], snan1[1], snan1[0]);
+	if (fetestexcept(FE_INVALID) != FE_INVALID) {
+		printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+		return 1;
+	}
+	return 0;
+}
+
+int testp(long flags)
+{
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+	asm ("\n"
+
+	"	push	%0""\n"
+	"	popf""\n"
+	"	fld1""\n"
+	"	fldpi""\n"
+	"	fcomip	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_1_pi""\n"
+
+	"	push	%0""\n"
+	"	popf""\n"
+	"	fldpi""\n"
+	"	fld1""\n"
+	"	fcomip	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_pi_1""\n"
+
+	"	push	%0""\n"
+	"	popf""\n"
+	"	fld1""\n"
+	"	fld1""\n"
+	"	fcomip	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_1_1""\n"
+	:
+	: "r" (flags)
+	);
+	if ((res_fcomi_1_pi & ARITH) != (0)) {
+		printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags);
+		return 1;
+	}
+	if ((res_fcomi_pi_1 & ARITH) != (CF)) {
+		printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH);
+		return 1;
+	}
+	if ((res_fcomi_1_1 & ARITH) != (ZF)) {
+		printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags);
+		return 1;
+	}
+	if (fetestexcept(FE_INVALID) != 0) {
+		printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+		return 1;
+	}
+	return 0;
+}
+
+int testp_qnan(long flags)
+{
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+	asm ("\n"
+	"	push	%0""\n"
+	"	popf""\n"
+	"	flds	qnan""\n"
+	"	fld1""\n"
+	"	fnclex""\n"		// fld of a qnan raised FE_INVALID, clear it
+	"	fcomip	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_nan_1""\n"
+	:
+	: "r" (flags)
+	);
+	if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+		printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+		return 1;
+	}
+	if (fetestexcept(FE_INVALID) != FE_INVALID) {
+		printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+		return 1;
+	}
+	return 0;
+}
+
+int testup_qnan(long flags)
+{
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+	asm ("\n"
+	"	push	%0""\n"
+	"	popf""\n"
+	"	flds	qnan""\n"
+	"	fld1""\n"
+	"	fnclex""\n"		// fld of a qnan raised FE_INVALID, clear it
+	"	fucomip	%%st(1), %%st" "\n"
+	"	ffree	%%st(0)" "\n"
+	"	pushf""\n"
+	"	pop	res_fcomi_nan_1""\n"
+	:
+	: "r" (flags)
+	);
+	if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+		printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+		return 1;
+	}
+	if (fetestexcept(FE_INVALID) != 0) {
+		printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+		return 1;
+	}
+	return 0;
+}
+
+void sighandler(int sig)
+{
+	printf("[FAIL]\tGot signal %d, exiting\n", sig);
+	exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	int err = 0;
+
+	/* SIGILL triggers on 32-bit kernels w/o fcomi emulation
+	 * when run with "no387 nofxsr". Other signals are caught
+	 * just in case.
+	 */
+	signal(SIGILL, sighandler);
+	signal(SIGFPE, sighandler);
+	signal(SIGSEGV, sighandler);
+
+	printf("[RUN]\tTesting f[u]comi[p] instructions\n");
+	err |= test(0);
+	err |= test_qnan(0);
+	err |= testu_qnan(0);
+	err |= testu_snan(0);
+	err |= test(CF|ZF|PF);
+	err |= test_qnan(CF|ZF|PF);
+	err |= testu_qnan(CF|ZF|PF);
+	err |= testu_snan(CF|ZF|PF);
+	err |= testp(0);
+	err |= testp_qnan(0);
+	err |= testup_qnan(0);
+	err |= testp(CF|ZF|PF);
+	err |= testp_qnan(CF|ZF|PF);
+	err |= testup_qnan(CF|ZF|PF);
+	if (!err)
+		printf("[OK]\tf[u]comi[p]\n");
+	else
+		printf("[FAIL]\tf[u]comi[p] errors: %d\n", err);
+
+	return err;
+}
diff --git a/tools/testing/selftests/x86/test_FISTTP.c b/tools/testing/selftests/x86/test_FISTTP.c
new file mode 100644
index 000000000..09789c0ce
--- /dev/null
+++ b/tools/testing/selftests/x86/test_FISTTP.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <fenv.h>
+
+unsigned long long res64 = -1;
+unsigned int res32 = -1;
+unsigned short res16 = -1;
+
+int test(void)
+{
+	int ex;
+
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+	asm volatile ("\n"
+	"	fld1""\n"
+	"	fisttp	res16""\n"
+	"	fld1""\n"
+	"	fisttpl	res32""\n"
+	"	fld1""\n"
+	"	fisttpll res64""\n"
+	: : : "memory"
+	);
+	if (res16 != 1 || res32 != 1 || res64 != 1) {
+		printf("[BAD]\tfisttp 1\n");
+		return 1;
+	}
+	ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+	if (ex != 0) {
+		printf("[BAD]\tfisttp 1: wrong exception state\n");
+		return 1;
+	}
+
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+	asm volatile ("\n"
+	"	fldpi""\n"
+	"	fisttp	res16""\n"
+	"	fldpi""\n"
+	"	fisttpl	res32""\n"
+	"	fldpi""\n"
+	"	fisttpll res64""\n"
+	: : : "memory"
+	);
+	if (res16 != 3 || res32 != 3 || res64 != 3) {
+		printf("[BAD]\tfisttp pi\n");
+		return 1;
+	}
+	ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+	if (ex != FE_INEXACT) {
+		printf("[BAD]\tfisttp pi: wrong exception state\n");
+		return 1;
+	}
+
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+	asm volatile ("\n"
+	"	fldpi""\n"
+	"	fchs""\n"
+	"	fisttp	res16""\n"
+	"	fldpi""\n"
+	"	fchs""\n"
+	"	fisttpl	res32""\n"
+	"	fldpi""\n"
+	"	fchs""\n"
+	"	fisttpll res64""\n"
+	: : : "memory"
+	);
+	if (res16 != 0xfffd || res32 != 0xfffffffd || res64 != 0xfffffffffffffffdULL) {
+		printf("[BAD]\tfisttp -pi\n");
+		return 1;
+	}
+	ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+	if (ex != FE_INEXACT) {
+		printf("[BAD]\tfisttp -pi: wrong exception state\n");
+		return 1;
+	}
+
+	feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+	asm volatile ("\n"
+	"	fldln2""\n"
+	"	fisttp	res16""\n"
+	"	fldln2""\n"
+	"	fisttpl	res32""\n"
+	"	fldln2""\n"
+	"	fisttpll res64""\n"
+	: : : "memory"
+	);
+	/* Test truncation to zero (round-to-nearest would give 1 here) */
+	if (res16 != 0 || res32 != 0 || res64 != 0) {
+		printf("[BAD]\tfisttp ln2\n");
+		return 1;
+	}
+	ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+	if (ex != FE_INEXACT) {
+		printf("[BAD]\tfisttp ln2: wrong exception state\n");
+		return 1;
+	}
+
+	return 0;
+}
+
+void sighandler(int sig)
+{
+	printf("[FAIL]\tGot signal %d, exiting\n", sig);
+	exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	int err = 0;
+
+	/* SIGILL triggers on 32-bit kernels w/o fisttp emulation
+	 * when run with "no387 nofxsr". Other signals are caught
+	 * just in case.
+	 */
+	signal(SIGILL, sighandler);
+	signal(SIGFPE, sighandler);
+	signal(SIGSEGV, sighandler);
+
+	printf("[RUN]\tTesting fisttp instructions\n");
+	err |= test();
+	if (!err)
+		printf("[OK]\tfisttp\n");
+	else
+		printf("[FAIL]\tfisttp errors: %d\n", err);
+
+	return err;
+}
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
new file mode 100644
index 000000000..64f11c8d9
--- /dev/null
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -0,0 +1,115 @@
+/*
+ * 32-bit test to check vDSO mremap.
+ *
+ * Copyright (c) 2016 Dmitry Safonov
+ * Suggested-by: Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_mremap_vdso.c
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/mman.h>
+#include <sys/auxv.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+
+#define PAGE_SIZE	4096
+
+static int try_to_remap(void *vdso_addr, unsigned long size)
+{
+	void *dest_addr, *new_addr;
+
+	/* Searching for memory location where to remap */
+	dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	if (dest_addr == MAP_FAILED) {
+		printf("[WARN]\tmmap failed (%d): %m\n", errno);
+		return 0;
+	}
+
+	printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
+		vdso_addr, (unsigned long)vdso_addr + size,
+		dest_addr, (unsigned long)dest_addr + size);
+	fflush(stdout);
+
+	new_addr = mremap(vdso_addr, size, size,
+			MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr);
+	if ((unsigned long)new_addr == (unsigned long)-1) {
+		munmap(dest_addr, size);
+		if (errno == EINVAL) {
+			printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n");
+			return -1; /* Retry with larger */
+		}
+		printf("[FAIL]\tmremap failed (%d): %m\n", errno);
+		return 1;
+	}
+
+	return 0;
+
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	pid_t child;
+
+	child = fork();
+	if (child == -1) {
+		printf("[WARN]\tfailed to fork (%d): %m\n", errno);
+		return 1;
+	}
+
+	if (child == 0) {
+		unsigned long vdso_size = PAGE_SIZE;
+		unsigned long auxval;
+		int ret = -1;
+
+		auxval = getauxval(AT_SYSINFO_EHDR);
+		printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval);
+		if (!auxval || auxval == -ENOENT) {
+			printf("[WARN]\tgetauxval failed\n");
+			return 0;
+		}
+
+		/* Simpler than parsing ELF header */
+		while (ret < 0) {
+			ret = try_to_remap((void *)auxval, vdso_size);
+			vdso_size += PAGE_SIZE;
+		}
+
+#ifdef __i386__
+		/* Glibc is likely to explode now - exit with raw syscall */
+		asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
+#else /* __x86_64__ */
+		syscall(SYS_exit, ret);
+#endif
+	} else {
+		int status;
+
+		if (waitpid(child, &status, 0) != child ||
+			!WIFEXITED(status)) {
+			printf("[FAIL]\tmremap() of the vDSO does not work on this kernel!\n");
+			return 1;
+		} else if (WEXITSTATUS(status) != 0) {
+			printf("[FAIL]\tChild failed with %d\n",
+					WEXITSTATUS(status));
+			return 1;
+		}
+		printf("[OK]\n");
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c
new file mode 100644
index 000000000..c9c328107
--- /dev/null
+++ b/tools/testing/selftests/x86/test_syscall_vdso.c
@@ -0,0 +1,408 @@
+/*
+ * 32-bit syscall ABI conformance test.
+ *
+ * Copyright (c) 2015 Denys Vlasenko
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
+ */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <elf.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+#if !defined(__i386__)
+int main(int argc, char **argv, char **envp)
+{
+	printf("[SKIP]\tNot a 32-bit x86 userspace\n");
+	return 0;
+}
+#else
+
+long syscall_addr;
+long get_syscall(char **envp)
+{
+	Elf32_auxv_t *auxv;
+	while (*envp++ != NULL)
+		continue;
+	for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
+		if (auxv->a_type == AT_SYSINFO)
+			return auxv->a_un.a_val;
+	printf("[WARN]\tAT_SYSINFO not supplied\n");
+	return 0;
+}
+
+asm (
+	"	.pushsection .text\n"
+	"	.global	int80\n"
+	"int80:\n"
+	"	int	$0x80\n"
+	"	ret\n"
+	"	.popsection\n"
+);
+extern char int80;
+
+struct regs64 {
+	uint64_t rax, rbx, rcx, rdx;
+	uint64_t rsi, rdi, rbp, rsp;
+	uint64_t r8,  r9,  r10, r11;
+	uint64_t r12, r13, r14, r15;
+};
+struct regs64 regs64;
+int kernel_is_64bit;
+
+asm (
+	"	.pushsection .text\n"
+	"	.code64\n"
+	"get_regs64:\n"
+	"	push	%rax\n"
+	"	mov	$regs64, %eax\n"
+	"	pop	0*8(%rax)\n"
+	"	movq	%rbx, 1*8(%rax)\n"
+	"	movq	%rcx, 2*8(%rax)\n"
+	"	movq	%rdx, 3*8(%rax)\n"
+	"	movq	%rsi, 4*8(%rax)\n"
+	"	movq	%rdi, 5*8(%rax)\n"
+	"	movq	%rbp, 6*8(%rax)\n"
+	"	movq	%rsp, 7*8(%rax)\n"
+	"	movq	%r8,  8*8(%rax)\n"
+	"	movq	%r9,  9*8(%rax)\n"
+	"	movq	%r10, 10*8(%rax)\n"
+	"	movq	%r11, 11*8(%rax)\n"
+	"	movq	%r12, 12*8(%rax)\n"
+	"	movq	%r13, 13*8(%rax)\n"
+	"	movq	%r14, 14*8(%rax)\n"
+	"	movq	%r15, 15*8(%rax)\n"
+	"	ret\n"
+	"poison_regs64:\n"
+	"	movq	$0x7f7f7f7f, %r8\n"
+	"	shl	$32, %r8\n"
+	"	orq	$0x7f7f7f7f, %r8\n"
+	"	movq	%r8, %r9\n"
+	"	incq	%r9\n"
+	"	movq	%r9, %r10\n"
+	"	incq	%r10\n"
+	"	movq	%r10, %r11\n"
+	"	incq	%r11\n"
+	"	movq	%r11, %r12\n"
+	"	incq	%r12\n"
+	"	movq	%r12, %r13\n"
+	"	incq	%r13\n"
+	"	movq	%r13, %r14\n"
+	"	incq	%r14\n"
+	"	movq	%r14, %r15\n"
+	"	incq	%r15\n"
+	"	ret\n"
+	"	.code32\n"
+	"	.popsection\n"
+);
+extern void get_regs64(void);
+extern void poison_regs64(void);
+extern unsigned long call64_from_32(void (*function)(void));
+void print_regs64(void)
+{
+	if (!kernel_is_64bit)
+		return;
+	printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax,  regs64.rbx,  regs64.rcx,  regs64.rdx);
+	printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi,  regs64.rdi,  regs64.rbp,  regs64.rsp);
+	printf(" 8:%016llx  9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 ,  regs64.r9 ,  regs64.r10,  regs64.r11);
+	printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12,  regs64.r13,  regs64.r14,  regs64.r15);
+}
+
+int check_regs64(void)
+{
+	int err = 0;
+	int num = 8;
+	uint64_t *r64 = &regs64.r8;
+	uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
+
+	if (!kernel_is_64bit)
+		return 0;
+
+	do {
+		if (*r64 == expected++)
+			continue; /* register did not change */
+		if (syscall_addr != (long)&int80) {
+			/*
+			 * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
+			 * either clear them to 0, or for R11, load EFLAGS.
+			 */
+			if (*r64 == 0)
+				continue;
+			if (num == 11) {
+				printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
+				continue;
+			}
+		} else {
+			/*
+			 * INT80 syscall entrypoint can be used by
+			 * 64-bit programs too, unlike SYSCALL/SYSENTER.
+			 * Therefore it must preserve R12+
+			 * (they are callee-saved registers in 64-bit C ABI).
+			 *
+			 * Starting in Linux 4.17 (and any kernel that
+			 * backports the change), R8..11 are preserved.
+			 * Historically (and probably unintentionally), they
+			 * were clobbered or zeroed.
+			 */
+		}
+		printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
+		err++;
+	} while (r64++, ++num < 16);
+
+	if (!err)
+		printf("[OK]\tR8..R15 did not leak kernel data\n");
+	return err;
+}
+
+int nfds;
+fd_set rfds;
+fd_set wfds;
+fd_set efds;
+struct timespec timeout;
+sigset_t sigmask;
+struct {
+	sigset_t *sp;
+	int sz;
+} sigmask_desc;
+
+void prep_args()
+{
+	nfds = 42;
+	FD_ZERO(&rfds);
+	FD_ZERO(&wfds);
+	FD_ZERO(&efds);
+	FD_SET(0, &rfds);
+	FD_SET(1, &wfds);
+	FD_SET(2, &efds);
+	timeout.tv_sec = 0;
+	timeout.tv_nsec = 123;
+	sigemptyset(&sigmask);
+	sigaddset(&sigmask, SIGINT);
+	sigaddset(&sigmask, SIGUSR2);
+	sigaddset(&sigmask, SIGRTMAX);
+	sigmask_desc.sp = &sigmask;
+	sigmask_desc.sz = 8; /* bytes */
+}
+
+static void print_flags(const char *name, unsigned long r)
+{
+	static const char *bitarray[] = {
+	"\n" ,"c\n" ,/* Carry Flag */
+	"0 " ,"1 "  ,/* Bit 1 - always on */
+	""   ,"p "  ,/* Parity Flag */
+	"0 " ,"3? " ,
+	""   ,"a "  ,/* Auxiliary carry Flag */
+	"0 " ,"5? " ,
+	""   ,"z "  ,/* Zero Flag */
+	""   ,"s "  ,/* Sign Flag */
+	""   ,"t "  ,/* Trap Flag */
+	""   ,"i "  ,/* Interrupt Flag */
+	""   ,"d "  ,/* Direction Flag */
+	""   ,"o "  ,/* Overflow Flag */
+	"0 " ,"1 "  ,/* I/O Privilege Level (2 bits) */
+	"0"  ,"1"   ,/* I/O Privilege Level (2 bits) */
+	""   ,"n "  ,/* Nested Task */
+	"0 " ,"15? ",
+	""   ,"r "  ,/* Resume Flag */
+	""   ,"v "  ,/* Virtual Mode */
+	""   ,"ac " ,/* Alignment Check/Access Control */
+	""   ,"vif ",/* Virtual Interrupt Flag */
+	""   ,"vip ",/* Virtual Interrupt Pending */
+	""   ,"id " ,/* CPUID detection */
+	NULL
+	};
+	const char **bitstr;
+	int bit;
+
+	printf("%s=%016lx ", name, r);
+	bitstr = bitarray + 42;
+	bit = 21;
+	if ((r >> 22) != 0)
+		printf("(extra bits are set) ");
+	do {
+		if (bitstr[(r >> bit) & 1][0])
+			fputs(bitstr[(r >> bit) & 1], stdout);
+		bitstr -= 2;
+		bit--;
+	} while (bit >= 0);
+}
+
+int run_syscall(void)
+{
+	long flags, bad_arg;
+
+	prep_args();
+
+	if (kernel_is_64bit)
+		call64_from_32(poison_regs64);
+	/*print_regs64();*/
+
+	asm("\n"
+	/* Try 6-arg syscall: pselect. It should return quickly */
+	"	push	%%ebp\n"
+	"	mov	$308, %%eax\n"     /* PSELECT */
+	"	mov	nfds, %%ebx\n"     /* ebx  arg1 */
+	"	mov	$rfds, %%ecx\n"    /* ecx  arg2 */
+	"	mov	$wfds, %%edx\n"    /* edx  arg3 */
+	"	mov	$efds, %%esi\n"    /* esi  arg4 */
+	"	mov	$timeout, %%edi\n" /* edi  arg5 */
+	"	mov	$sigmask_desc, %%ebp\n" /* %ebp arg6 */
+	"	push	$0x200ed7\n"      /* set almost all flags */
+	"	popf\n"		/* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
+	"	call	*syscall_addr\n"
+	/* Check that registers are not clobbered */
+	"	pushf\n"
+	"	pop	%%eax\n"
+	"	cld\n"
+	"	cmp	nfds, %%ebx\n"     /* ebx  arg1 */
+	"	mov	$1, %%ebx\n"
+	"	jne	1f\n"
+	"	cmp	$rfds, %%ecx\n"    /* ecx  arg2 */
+	"	mov	$2, %%ebx\n"
+	"	jne	1f\n"
+	"	cmp	$wfds, %%edx\n"    /* edx  arg3 */
+	"	mov	$3, %%ebx\n"
+	"	jne	1f\n"
+	"	cmp	$efds, %%esi\n"    /* esi  arg4 */
+	"	mov	$4, %%ebx\n"
+	"	jne	1f\n"
+	"	cmp	$timeout, %%edi\n" /* edi  arg5 */
+	"	mov	$5, %%ebx\n"
+	"	jne	1f\n"
+	"	cmpl	$sigmask_desc, %%ebp\n" /* %ebp arg6 */
+	"	mov	$6, %%ebx\n"
+	"	jne	1f\n"
+	"	mov	$0, %%ebx\n"
+	"1:\n"
+	"	pop	%%ebp\n"
+	: "=a" (flags), "=b" (bad_arg)
+	:
+	: "cx", "dx", "si", "di"
+	);
+
+	if (kernel_is_64bit) {
+		memset(&regs64, 0x77, sizeof(regs64));
+		call64_from_32(get_regs64);
+		/*print_regs64();*/
+	}
+
+	/*
+	 * On paravirt kernels, flags are not preserved across syscalls.
+	 * Thus, we do not consider it a bug if some are changed.
+	 * We just show ones which do.
+	 */
+	if ((0x200ed7 ^ flags) != 0) {
+		print_flags("[WARN]\tFlags before", 0x200ed7);
+		print_flags("[WARN]\tFlags  after", flags);
+		print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
+	}
+
+	if (bad_arg) {
+		printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
+		return 1;
+	}
+	printf("[OK]\tArguments are preserved across syscall\n");
+
+	return check_regs64();
+}
+
+int run_syscall_twice()
+{
+	int exitcode = 0;
+	long sv;
+
+	if (syscall_addr) {
+		printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
+		exitcode = run_syscall();
+	}
+	sv = syscall_addr;
+	syscall_addr = (long)&int80;
+	printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
+	exitcode += run_syscall();
+	syscall_addr = sv;
+	return exitcode;
+}
+
+void ptrace_me()
+{
+	pid_t pid;
+
+	fflush(NULL);
+	pid = fork();
+	if (pid < 0)
+		exit(1);
+	if (pid == 0) {
+		/* child */
+		if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
+			exit(0);
+		raise(SIGSTOP);
+		return;
+	}
+	/* parent */
+	printf("[RUN]\tRunning tests under ptrace\n");
+	while (1) {
+		int status;
+		pid = waitpid(-1, &status, __WALL);
+		if (WIFEXITED(status))
+			exit(WEXITSTATUS(status));
+		if (WIFSIGNALED(status))
+			exit(WTERMSIG(status));
+		if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
+			exit(255);
+		/*
+		 * Note: we do not inject sig = WSTOPSIG(status).
+		 * We probably should, but careful: do not inject SIGTRAP
+		 * generated by syscall entry/exit stops.
+		 * That kills the child.
+		 */
+		ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
+	}
+}
+
+int main(int argc, char **argv, char **envp)
+{
+	int exitcode = 0;
+	int cs;
+
+	asm("\n"
+	"	movl	%%cs, %%eax\n"
+	: "=a" (cs)
+	);
+	kernel_is_64bit = (cs == 0x23);
+	if (!kernel_is_64bit)
+		printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
+
+	/* This only works for non-static builds:
+	 * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
+	 */
+	syscall_addr = get_syscall(envp);
+
+	exitcode += run_syscall_twice();
+	ptrace_me();
+	exitcode += run_syscall_twice();
+
+	return exitcode;
+}
+#endif
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
new file mode 100644
index 000000000..35edd61d1
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2011-2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <limits.h>
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+#  define SYS_getcpu 309
+# else
+#  define SYS_getcpu 318
+# endif
+#endif
+
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
+int nerrs = 0;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+getcpu_t vgetcpu;
+getcpu_t vdso_getcpu;
+
+static void *vsyscall_getcpu(void)
+{
+#ifdef __x86_64__
+	FILE *maps;
+	char line[MAPS_LINE_LEN];
+	bool found = false;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
+		return NULL;
+
+	while (fgets(line, MAPS_LINE_LEN, maps)) {
+		char r, x;
+		void *start, *end;
+		char name[MAPS_LINE_LEN];
+
+		/* sscanf() is safe here as strlen(name) >= strlen(line) */
+		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+			   &start, &end, &r, &x, name) != 5)
+			continue;
+
+		if (strcmp(name, "[vsyscall]"))
+			continue;
+
+		/* assume entries are OK, as we test vDSO here not vsyscall */
+		found = true;
+		break;
+	}
+
+	fclose(maps);
+
+	if (!found) {
+		printf("Warning: failed to find vsyscall getcpu\n");
+		return NULL;
+	}
+	return (void *) (0xffffffffff600800);
+#else
+	return NULL;
+#endif
+}
+
+
+static void fill_function_pointers()
+{
+	void *vdso = dlopen("linux-vdso.so.1",
+			    RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso)
+		vdso = dlopen("linux-gate.so.1",
+			      RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso) {
+		printf("[WARN]\tfailed to find vDSO\n");
+		return;
+	}
+
+	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+	if (!vdso_getcpu)
+		printf("Warning: failed to find getcpu in vDSO\n");
+
+	vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+	vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	if (!vdso_clock_gettime)
+		printf("Warning: failed to find clock_gettime in vDSO\n");
+
+	vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+	if (!vdso_gettimeofday)
+		printf("Warning: failed to find gettimeofday in vDSO\n");
+
+}
+
+static long sys_getcpu(unsigned * cpu, unsigned * node,
+		       void* cache)
+{
+	return syscall(__NR_getcpu, cpu, node, cache);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+	return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+	return syscall(__NR_gettimeofday, tv, tz);
+}
+
+static void test_getcpu(void)
+{
+	printf("[RUN]\tTesting getcpu...\n");
+
+	for (int cpu = 0; ; cpu++) {
+		cpu_set_t cpuset;
+		CPU_ZERO(&cpuset);
+		CPU_SET(cpu, &cpuset);
+		if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+			return;
+
+		unsigned cpu_sys, cpu_vdso, cpu_vsys,
+			node_sys, node_vdso, node_vsys;
+		long ret_sys, ret_vdso = 1, ret_vsys = 1;
+		unsigned node;
+
+		ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+		if (vdso_getcpu)
+			ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+		if (vgetcpu)
+			ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+		if (!ret_sys)
+			node = node_sys;
+		else if (!ret_vdso)
+			node = node_vdso;
+		else if (!ret_vsys)
+			node = node_vsys;
+
+		bool ok = true;
+		if (!ret_sys && (cpu_sys != cpu || node_sys != node))
+			ok = false;
+		if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
+			ok = false;
+		if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
+			ok = false;
+
+		printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
+		if (!ret_sys)
+			printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
+		if (!ret_vdso)
+			printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
+		if (!ret_vsys)
+			printf(" vsyscall: cpu %u, node %u", cpu_vsys,
+			       node_vsys);
+		printf("\n");
+
+		if (!ok)
+			nerrs++;
+	}
+}
+
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+	if (a->tv_sec != b->tv_sec)
+		return a->tv_sec < b->tv_sec;
+	else
+		return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+	[0] = "CLOCK_REALTIME",
+	[1] = "CLOCK_MONOTONIC",
+	[2] = "CLOCK_PROCESS_CPUTIME_ID",
+	[3] = "CLOCK_THREAD_CPUTIME_ID",
+	[4] = "CLOCK_MONOTONIC_RAW",
+	[5] = "CLOCK_REALTIME_COARSE",
+	[6] = "CLOCK_MONOTONIC_COARSE",
+	[7] = "CLOCK_BOOTTIME",
+	[8] = "CLOCK_REALTIME_ALARM",
+	[9] = "CLOCK_BOOTTIME_ALARM",
+	[10] = "CLOCK_SGI_CYCLE",
+	[11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+	struct timespec start, vdso, end;
+	int vdso_ret, end_ret;
+
+	printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+	if (sys_clock_gettime(clock, &start) < 0) {
+		if (errno == EINVAL) {
+			vdso_ret = vdso_clock_gettime(clock, &vdso);
+			if (vdso_ret == -EINVAL) {
+				printf("[OK]\tNo such clock.\n");
+			} else {
+				printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+				nerrs++;
+			}
+		} else {
+			printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+		}
+		return;
+	}
+
+	vdso_ret = vdso_clock_gettime(clock, &vdso);
+	end_ret = sys_clock_gettime(clock, &end);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_nsec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+	       (unsigned long long)end.tv_sec, end.tv_nsec);
+
+	if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+}
+
+static void test_clock_gettime(void)
+{
+	for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+	     clock++) {
+		test_one_clock_gettime(clock, clocknames[clock]);
+	}
+
+	/* Also test some invalid clock ids */
+	test_one_clock_gettime(-1, "invalid");
+	test_one_clock_gettime(INT_MIN, "invalid");
+	test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+	struct timeval start, vdso, end;
+	struct timezone sys_tz, vdso_tz;
+	int vdso_ret, end_ret;
+
+	if (!vdso_gettimeofday)
+		return;
+
+	printf("[RUN]\tTesting gettimeofday...\n");
+
+	if (sys_gettimeofday(&start, &sys_tz) < 0) {
+		printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+		nerrs++;
+		return;
+	}
+
+	vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+	end_ret = sys_gettimeofday(&end, NULL);
+
+	if (vdso_ret != 0 || end_ret != 0) {
+		printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+		       vdso_ret, errno);
+		nerrs++;
+		return;
+	}
+
+	printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+	       (unsigned long long)start.tv_sec, start.tv_usec,
+	       (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+	       (unsigned long long)end.tv_sec, end.tv_usec);
+
+	if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+		printf("[FAIL]\tTimes are out of sequence\n");
+		nerrs++;
+	}
+
+	if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+	    sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+		printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+		       sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+	} else {
+		printf("[FAIL]\ttimezones do not match\n");
+		nerrs++;
+	}
+
+	/* And make sure that passing NULL for tz doesn't crash. */
+	vdso_gettimeofday(&vdso, NULL);
+}
+
+int main(int argc, char **argv)
+{
+	fill_function_pointers();
+
+	test_clock_gettime();
+	test_gettimeofday();
+
+	/*
+	 * Test getcpu() last so that, if something goes wrong setting affinity,
+	 * we still run the other tests.
+	 */
+	test_getcpu();
+
+	return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
new file mode 100644
index 000000000..0b4f1cc22
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -0,0 +1,506 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <errno.h>
+#include <err.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+#  define SYS_getcpu 309
+# else
+#  define SYS_getcpu 318
+# endif
+#endif
+
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+/* vsyscalls and vDSO */
+bool should_read_vsyscall = false;
+
+typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+gtod_t vdso_gtod;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+vgettime_t vdso_gettime;
+
+typedef long (*time_func_t)(time_t *t);
+time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+time_func_t vdso_time;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+static void init_vdso(void)
+{
+	void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso)
+		vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+	if (!vdso) {
+		printf("[WARN]\tfailed to find vDSO\n");
+		return;
+	}
+
+	vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
+	if (!vdso_gtod)
+		printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+
+	vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+	if (!vdso_gettime)
+		printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+
+	vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
+	if (!vdso_time)
+		printf("[WARN]\tfailed to find time in vDSO\n");
+
+	vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+	if (!vdso_getcpu) {
+		/* getcpu() was never wired up in the 32-bit vDSO. */
+		printf("[%s]\tfailed to find getcpu in vDSO\n",
+		       sizeof(long) == 8 ? "WARN" : "NOTE");
+	}
+}
+
+static int init_vsys(void)
+{
+#ifdef __x86_64__
+	int nerrs = 0;
+	FILE *maps;
+	char line[MAPS_LINE_LEN];
+	bool found = false;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) {
+		printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
+		should_read_vsyscall = true;
+		return 0;
+	}
+
+	while (fgets(line, MAPS_LINE_LEN, maps)) {
+		char r, x;
+		void *start, *end;
+		char name[MAPS_LINE_LEN];
+
+		/* sscanf() is safe here as strlen(name) >= strlen(line) */
+		if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+			   &start, &end, &r, &x, name) != 5)
+			continue;
+
+		if (strcmp(name, "[vsyscall]"))
+			continue;
+
+		printf("\tvsyscall map: %s", line);
+
+		if (start != (void *)0xffffffffff600000 ||
+		    end != (void *)0xffffffffff601000) {
+			printf("[FAIL]\taddress range is nonsense\n");
+			nerrs++;
+		}
+
+		printf("\tvsyscall permissions are %c-%c\n", r, x);
+		should_read_vsyscall = (r == 'r');
+		if (x != 'x') {
+			vgtod = NULL;
+			vtime = NULL;
+			vgetcpu = NULL;
+		}
+
+		found = true;
+		break;
+	}
+
+	fclose(maps);
+
+	if (!found) {
+		printf("\tno vsyscall map in /proc/self/maps\n");
+		should_read_vsyscall = false;
+		vgtod = NULL;
+		vtime = NULL;
+		vgetcpu = NULL;
+	}
+
+	return nerrs;
+#else
+	return 0;
+#endif
+}
+
+/* syscalls */
+static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
+{
+	return syscall(SYS_gettimeofday, tv, tz);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+	return syscall(SYS_clock_gettime, id, ts);
+}
+
+static inline long sys_time(time_t *t)
+{
+	return syscall(SYS_time, t);
+}
+
+static inline long sys_getcpu(unsigned * cpu, unsigned * node,
+			      void* cache)
+{
+	return syscall(SYS_getcpu, cpu, node, cache);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+	siglongjmp(jmpbuf, 1);
+}
+
+static double tv_diff(const struct timeval *a, const struct timeval *b)
+{
+	return (double)(a->tv_sec - b->tv_sec) +
+		(double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
+}
+
+static int check_gtod(const struct timeval *tv_sys1,
+		      const struct timeval *tv_sys2,
+		      const struct timezone *tz_sys,
+		      const char *which,
+		      const struct timeval *tv_other,
+		      const struct timezone *tz_other)
+{
+	int nerrs = 0;
+	double d1, d2;
+
+	if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
+		printf("[FAIL] %s tz mismatch\n", which);
+		nerrs++;
+	}
+
+	d1 = tv_diff(tv_other, tv_sys1);
+	d2 = tv_diff(tv_sys2, tv_other); 
+	printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
+
+	if (d1 < 0 || d2 < 0) {
+		printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
+		nerrs++;
+	} else {
+		printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
+	}
+
+	return nerrs;
+}
+
+static int test_gtod(void)
+{
+	struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
+	struct timezone tz_sys, tz_vdso, tz_vsys;
+	long ret_vdso = -1;
+	long ret_vsys = -1;
+	int nerrs = 0;
+
+	printf("[RUN]\ttest gettimeofday()\n");
+
+	if (sys_gtod(&tv_sys1, &tz_sys) != 0)
+		err(1, "syscall gettimeofday");
+	if (vdso_gtod)
+		ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
+	if (vgtod)
+		ret_vsys = vgtod(&tv_vsys, &tz_vsys);
+	if (sys_gtod(&tv_sys2, &tz_sys) != 0)
+		err(1, "syscall gettimeofday");
+
+	if (vdso_gtod) {
+		if (ret_vdso == 0) {
+			nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+		} else {
+			printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
+			nerrs++;
+		}
+	}
+
+	if (vgtod) {
+		if (ret_vsys == 0) {
+			nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+		} else {
+			printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
+			nerrs++;
+		}
+	}
+
+	return nerrs;
+}
+
+static int test_time(void) {
+	int nerrs = 0;
+
+	printf("[RUN]\ttest time()\n");
+	long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
+	long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+	t_sys1 = sys_time(&t2_sys1);
+	if (vdso_time)
+		t_vdso = vdso_time(&t2_vdso);
+	if (vtime)
+		t_vsys = vtime(&t2_vsys);
+	t_sys2 = sys_time(&t2_sys2);
+	if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
+		printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
+		nerrs++;
+		return nerrs;
+	}
+
+	if (vdso_time) {
+		if (t_vdso < 0 || t_vdso != t2_vdso) {
+			printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
+			nerrs++;
+		} else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
+			printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
+			nerrs++;
+		} else {
+			printf("[OK]\tvDSO time() is okay\n");
+		}
+	}
+
+	if (vtime) {
+		if (t_vsys < 0 || t_vsys != t2_vsys) {
+			printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
+			nerrs++;
+		} else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
+			printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
+			nerrs++;
+		} else {
+			printf("[OK]\tvsyscall time() is okay\n");
+		}
+	}
+
+	return nerrs;
+}
+
+static int test_getcpu(int cpu)
+{
+	int nerrs = 0;
+	long ret_sys, ret_vdso = -1, ret_vsys = -1;
+
+	printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+		printf("[SKIP]\tfailed to force CPU %d\n", cpu);
+		return nerrs;
+	}
+
+	unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
+	unsigned node = 0;
+	bool have_node = false;
+	ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+	if (vdso_getcpu)
+		ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+	if (vgetcpu)
+		ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+	if (ret_sys == 0) {
+		if (cpu_sys != cpu) {
+			printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
+			nerrs++;
+		}
+
+		have_node = true;
+		node = node_sys;
+	}
+
+	if (vdso_getcpu) {
+		if (ret_vdso) {
+			printf("[FAIL]\tvDSO getcpu() failed\n");
+			nerrs++;
+		} else {
+			if (!have_node) {
+				have_node = true;
+				node = node_vdso;
+			}
+
+			if (cpu_vdso != cpu) {
+				printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
+				nerrs++;
+			} else {
+				printf("[OK]\tvDSO reported correct CPU\n");
+			}
+
+			if (node_vdso != node) {
+				printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
+				nerrs++;
+			} else {
+				printf("[OK]\tvDSO reported correct node\n");
+			}
+		}
+	}
+
+	if (vgetcpu) {
+		if (ret_vsys) {
+			printf("[FAIL]\tvsyscall getcpu() failed\n");
+			nerrs++;
+		} else {
+			if (!have_node) {
+				have_node = true;
+				node = node_vsys;
+			}
+
+			if (cpu_vsys != cpu) {
+				printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
+				nerrs++;
+			} else {
+				printf("[OK]\tvsyscall reported correct CPU\n");
+			}
+
+			if (node_vsys != node) {
+				printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
+				nerrs++;
+			} else {
+				printf("[OK]\tvsyscall reported correct node\n");
+			}
+		}
+	}
+
+	return nerrs;
+}
+
+static int test_vsys_r(void)
+{
+#ifdef __x86_64__
+	printf("[RUN]\tChecking read access to the vsyscall page\n");
+	bool can_read;
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		*(volatile int *)0xffffffffff600000;
+		can_read = true;
+	} else {
+		can_read = false;
+	}
+
+	if (can_read && !should_read_vsyscall) {
+		printf("[FAIL]\tWe have read access, but we shouldn't\n");
+		return 1;
+	} else if (!can_read && should_read_vsyscall) {
+		printf("[FAIL]\tWe don't have read access, but we should\n");
+		return 1;
+	} else {
+		printf("[OK]\tgot expected result\n");
+	}
+#endif
+
+	return 0;
+}
+
+
+#ifdef __x86_64__
+#define X86_EFLAGS_TF (1UL << 8)
+static volatile sig_atomic_t num_vsyscall_traps;
+
+static unsigned long get_eflags(void)
+{
+	unsigned long eflags;
+	asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
+	return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+	asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t *)ctx_void;
+	unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
+
+	if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
+		num_vsyscall_traps++;
+}
+
+static int test_emulation(void)
+{
+	time_t tmp;
+	bool is_native;
+
+	if (!vtime)
+		return 0;
+
+	printf("[RUN]\tchecking that vsyscalls are emulated\n");
+	sethandler(SIGTRAP, sigtrap, 0);
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	vtime(&tmp);
+	set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+	/*
+	 * If vsyscalls are emulated, we expect a single trap in the
+	 * vsyscall page -- the call instruction will trap with RIP
+	 * pointing to the entry point before emulation takes over.
+	 * In native mode, we expect two traps, since whatever code
+	 * the vsyscall page contains will be more than just a ret
+	 * instruction.
+	 */
+	is_native = (num_vsyscall_traps > 1);
+
+	printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+	       (is_native ? "FAIL" : "OK"),
+	       (is_native ? "native" : "emulated"),
+	       (int)num_vsyscall_traps);
+
+	return is_native;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+	int nerrs = 0;
+
+	init_vdso();
+	nerrs += init_vsys();
+
+	nerrs += test_gtod();
+	nerrs += test_time();
+	nerrs += test_getcpu(0);
+	nerrs += test_getcpu(1);
+
+	sethandler(SIGSEGV, sigsegv, 0);
+	nerrs += test_vsys_r();
+
+#ifdef __x86_64__
+	nerrs += test_emulation();
+#endif
+
+	return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S
new file mode 100644
index 000000000..ce8a995bb
--- /dev/null
+++ b/tools/testing/selftests/x86/thunks.S
@@ -0,0 +1,67 @@
+/*
+ * thunks.S - assembly helpers for mixed-bitness code
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * These are little helpers that make it easier to switch bitness on
+ * the fly.
+ */
+
+	.text
+
+	.global call32_from_64
+	.type call32_from_64, @function
+call32_from_64:
+	// rdi: stack to use
+	// esi: function to call
+
+	// Save registers
+	pushq %rbx
+	pushq %rbp
+	pushq %r12
+	pushq %r13
+	pushq %r14
+	pushq %r15
+	pushfq
+
+	// Switch stacks
+	mov %rsp,(%rdi)
+	mov %rdi,%rsp
+
+	// Switch to compatibility mode
+	pushq $0x23  /* USER32_CS */
+	pushq $1f
+	lretq
+
+1:
+	.code32
+	// Call the function
+	call *%esi
+	// Switch back to long mode
+	jmp $0x33,$1f
+	.code64
+
+1:
+	// Restore the stack
+	mov (%rsp),%rsp
+
+	// Restore registers
+	popfq
+	popq %r15
+	popq %r14
+	popq %r13
+	popq %r12
+	popq %rbp
+	popq %rbx
+
+	ret
+
+.size call32_from_64, .-call32_from_64
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
new file mode 100644
index 000000000..29b644bb9
--- /dev/null
+++ b/tools/testing/selftests/x86/thunks_32.S
@@ -0,0 +1,55 @@
+/*
+ * thunks_32.S - assembly helpers for mixed-bitness code
+ * Copyright (c) 2015 Denys Vlasenko
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * These are little helpers that make it easier to switch bitness on
+ * the fly.
+ */
+
+	.text
+	.code32
+
+	.global call64_from_32
+	.type call32_from_64, @function
+
+	// 4(%esp): function to call
+call64_from_32:
+	// Fetch function address
+	mov	4(%esp), %eax
+
+	// Save registers which are callee-clobbered by 64-bit ABI
+	push	%ecx
+	push	%edx
+	push	%esi
+	push	%edi
+
+	// Switch to long mode
+	jmp	$0x33,$1f
+1:	.code64
+
+	// Call the function
+	call	*%rax
+
+	// Switch to compatibility mode
+	push	$0x23  /* USER32_CS */
+	.code32; push $1f; .code64 /* hack: can't have X86_64_32S relocation in 32-bit ELF */
+	lretq
+1:	.code32
+
+	pop	%edi
+	pop	%esi
+	pop	%edx
+	pop	%ecx
+
+	ret
+
+.size call64_from_32, .-call64_from_32
diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c
new file mode 100644
index 000000000..fabdf0f51
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_32bit_program.c
@@ -0,0 +1,18 @@
+/*
+ * Trivial program to check that we have a valid 32-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#ifndef __i386__
+# error wrong architecture
+#endif
+
+#include <stdio.h>
+
+int main()
+{
+	printf("\n");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/trivial_64bit_program.c b/tools/testing/selftests/x86/trivial_64bit_program.c
new file mode 100644
index 000000000..05c6a41b3
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_64bit_program.c
@@ -0,0 +1,18 @@
+/*
+ * Trivial program to check that we have a valid 64-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#ifndef __x86_64__
+# error wrong architecture
+#endif
+
+#include <stdio.h>
+
+int main()
+{
+	printf("\n");
+
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/trivial_program.c b/tools/testing/selftests/x86/trivial_program.c
new file mode 100644
index 000000000..46a447163
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_program.c
@@ -0,0 +1,10 @@
+/* Trivial program to check that compilation with certain flags is working. */
+
+#include <stdio.h>
+
+int
+main(void)
+{
+	puts("");
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
new file mode 100644
index 000000000..00a26a82f
--- /dev/null
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -0,0 +1,211 @@
+/*
+ * unwind_vdso.c - tests unwind info for AT_SYSINFO in the vDSO
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This tests __kernel_vsyscall's unwind info.
+ */
+
+#define _GNU_SOURCE
+
+#include <features.h>
+#include <stdio.h>
+
+#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16
+
+int main()
+{
+	/* We need getauxval(). */
+	printf("[SKIP]\tGLIBC before 2.16 cannot compile this test\n");
+	return 0;
+}
+
+#else
+
+#include <sys/time.h>
+#include <stdlib.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/ucontext.h>
+#include <link.h>
+#include <sys/auxv.h>
+#include <dlfcn.h>
+#include <unwind.h>
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		err(1, "sigaction");
+}
+
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
+static unsigned long get_eflags(void)
+{
+	unsigned long eflags;
+	asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+	return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+	asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+		      : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
+static volatile sig_atomic_t nerrs;
+static unsigned long sysinfo;
+static bool got_sysinfo = false;
+static unsigned long return_address;
+
+struct unwind_state {
+	unsigned long ip;	/* trap source */
+	int depth;		/* -1 until we hit the trap source */
+};
+
+_Unwind_Reason_Code trace_fn(struct _Unwind_Context * ctx, void *opaque)
+{
+	struct unwind_state *state = opaque;
+	unsigned long ip = _Unwind_GetIP(ctx);
+
+	if (state->depth == -1) {
+		if (ip == state->ip)
+			state->depth = 0;
+		else
+			return _URC_NO_REASON;	/* Not there yet */
+	}
+	printf("\t  0x%lx\n", ip);
+
+	if (ip == return_address) {
+		/* Here we are. */
+		unsigned long eax = _Unwind_GetGR(ctx, 0);
+		unsigned long ecx = _Unwind_GetGR(ctx, 1);
+		unsigned long edx = _Unwind_GetGR(ctx, 2);
+		unsigned long ebx = _Unwind_GetGR(ctx, 3);
+		unsigned long ebp = _Unwind_GetGR(ctx, 5);
+		unsigned long esi = _Unwind_GetGR(ctx, 6);
+		unsigned long edi = _Unwind_GetGR(ctx, 7);
+		bool ok = (eax == SYS_getpid || eax == getpid()) &&
+			ebx == 1 && ecx == 2 && edx == 3 &&
+			esi == 4 && edi == 5 && ebp == 6;
+
+		if (!ok)
+			nerrs++;
+		printf("[%s]\t  NR = %ld, args = %ld, %ld, %ld, %ld, %ld, %ld\n",
+		       (ok ? "OK" : "FAIL"),
+		       eax, ebx, ecx, edx, esi, edi, ebp);
+
+		return _URC_NORMAL_STOP;
+	} else {
+		state->depth++;
+		return _URC_NO_REASON;
+	}
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t *)ctx_void;
+	struct unwind_state state;
+	unsigned long ip = ctx->uc_mcontext.gregs[REG_EIP];
+
+	if (!got_sysinfo && ip == sysinfo) {
+		got_sysinfo = true;
+
+		/* Find the return address. */
+		return_address = *(unsigned long *)(unsigned long)ctx->uc_mcontext.gregs[REG_ESP];
+
+		printf("\tIn vsyscall at 0x%lx, returning to 0x%lx\n",
+		       ip, return_address);
+	}
+
+	if (!got_sysinfo)
+		return;		/* Not there yet */
+
+	if (ip == return_address) {
+		ctx->uc_mcontext.gregs[REG_EFL] &= ~X86_EFLAGS_TF;
+		printf("\tVsyscall is done\n");
+		return;
+	}
+
+	printf("\tSIGTRAP at 0x%lx\n", ip);
+
+	state.ip = ip;
+	state.depth = -1;
+	_Unwind_Backtrace(trace_fn, &state);
+}
+
+int main()
+{
+	sysinfo = getauxval(AT_SYSINFO);
+	printf("\tAT_SYSINFO is 0x%lx\n", sysinfo);
+
+	Dl_info info;
+	if (!dladdr((void *)sysinfo, &info)) {
+		printf("[WARN]\tdladdr failed on AT_SYSINFO\n");
+	} else {
+		printf("[OK]\tAT_SYSINFO maps to %s, loaded at 0x%p\n",
+		       info.dli_fname, info.dli_fbase);
+	}
+
+	sethandler(SIGTRAP, sigtrap, 0);
+
+	syscall(SYS_getpid);  /* Force symbol binding without TF set. */
+	printf("[RUN]\tSet TF and check a fast syscall\n");
+	set_eflags(get_eflags() | X86_EFLAGS_TF);
+	syscall(SYS_getpid, 1, 2, 3, 4, 5, 6);
+	if (!got_sysinfo) {
+		set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+		/*
+		 * The most likely cause of this is that you're on Debian or
+		 * a Debian-based distro, you're missing libc6-i686, and you're
+		 * affected by libc/19006 (https://sourceware.org/PR19006).
+		 */
+		printf("[WARN]\tsyscall(2) didn't enter AT_SYSINFO\n");
+	}
+
+	if (get_eflags() & X86_EFLAGS_TF) {
+		printf("[FAIL]\tTF is still set\n");
+		nerrs++;
+	}
+
+	if (nerrs) {
+		printf("[FAIL]\tThere were errors\n");
+		return 1;
+	} else {
+		printf("[OK]\tAll is well\n");
+		return 0;
+	}
+}
+
+#endif	/* New enough libc */
diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c
new file mode 100644
index 000000000..cb038424a
--- /dev/null
+++ b/tools/testing/selftests/x86/vdso_restorer.c
@@ -0,0 +1,88 @@
+/*
+ * vdso_restorer.c - tests vDSO-based signal restore
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * This makes sure that sa_restorer == NULL keeps working on 32-bit
+ * configurations.  Modern glibc doesn't use it under any circumstances,
+ * so it's easy to overlook breakage.
+ *
+ * 64-bit userspace has never supported sa_restorer == NULL, so this is
+ * 32-bit only.
+ */
+
+#define _GNU_SOURCE
+
+#include <err.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/syscall.h>
+
+/* Open-code this -- the headers are too messy to easily use them. */
+struct real_sigaction {
+	void *handler;
+	unsigned long flags;
+	void *restorer;
+	unsigned int mask[2];
+};
+
+static volatile sig_atomic_t handler_called;
+
+static void handler_with_siginfo(int sig, siginfo_t *info, void *ctx_void)
+{
+	handler_called = 1;
+}
+
+static void handler_without_siginfo(int sig)
+{
+	handler_called = 1;
+}
+
+int main()
+{
+	int nerrs = 0;
+	struct real_sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.handler = handler_with_siginfo;
+	sa.flags = SA_SIGINFO;
+	sa.restorer = NULL;	/* request kernel-provided restorer */
+
+	if (syscall(SYS_rt_sigaction, SIGUSR1, &sa, NULL, 8) != 0)
+		err(1, "raw rt_sigaction syscall");
+
+	raise(SIGUSR1);
+
+	if (handler_called) {
+		printf("[OK]\tSA_SIGINFO handler returned successfully\n");
+	} else {
+		printf("[FAIL]\tSA_SIGINFO handler was not called\n");
+		nerrs++;
+	}
+
+	sa.flags = 0;
+	sa.handler = handler_without_siginfo;
+	if (syscall(SYS_sigaction, SIGUSR1, &sa, 0) != 0)
+		err(1, "raw sigaction syscall");
+	handler_called = 0;
+
+	raise(SIGUSR1);
+
+	if (handler_called) {
+		printf("[OK]\t!SA_SIGINFO handler returned successfully\n");
+	} else {
+		printf("[FAIL]\t!SA_SIGINFO handler was not called\n");
+		nerrs++;
+	}
+}
diff --git a/tools/testing/selftests/zram/Makefile b/tools/testing/selftests/zram/Makefile
new file mode 100644
index 000000000..7f78eb1b5
--- /dev/null
+++ b/tools/testing/selftests/zram/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := zram.sh
+TEST_FILES := zram01.sh zram02.sh zram_lib.sh
+EXTRA_CLEAN := err.log
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README
new file mode 100644
index 000000000..7972cc512
--- /dev/null
+++ b/tools/testing/selftests/zram/README
@@ -0,0 +1,40 @@
+zram: Compressed RAM based block devices
+----------------------------------------
+* Introduction
+
+The zram module creates RAM based block devices named /dev/zram<id>
+(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
+in memory itself. These disks allow very fast I/O and compression provides
+good amounts of memory savings. Some of the usecases include /tmp storage,
+use as swap disks, various caches under /var and maybe many more :)
+
+Statistics for individual zram devices are exported through sysfs nodes at
+/sys/block/zram<id>/
+
+Kconfig required:
+CONFIG_ZRAM=y
+CONFIG_CRYPTO_LZ4=y
+CONFIG_ZPOOL=y
+CONFIG_ZSMALLOC=y
+
+ZRAM Testcases
+--------------
+zram_lib.sh: create library with initialization/cleanup functions
+zram.sh: For sanity check of CONFIG_ZRAM and to run zram01 and zram02
+
+Two functional tests: zram01 and zram02:
+zram01.sh: creates general purpose ram disks with ext4 filesystems
+zram02.sh: creates block device for swap
+
+Commands required for testing:
+ - bc
+ - dd
+ - free
+ - awk
+ - mkswap
+ - swapon
+ - swapoff
+ - mkfs/ mkfs.ext4
+
+For more information please refer:
+kernel-source-tree/Documentation/blockdev/zram.txt
diff --git a/tools/testing/selftests/zram/config b/tools/testing/selftests/zram/config
new file mode 100644
index 000000000..e0cc47e2c
--- /dev/null
+++ b/tools/testing/selftests/zram/config
@@ -0,0 +1,2 @@
+CONFIG_ZSMALLOC=y
+CONFIG_ZRAM=m
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
new file mode 100755
index 000000000..b0b91d9b0
--- /dev/null
+++ b/tools/testing/selftests/zram/zram.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+TCID="zram.sh"
+
+. ./zram_lib.sh
+
+run_zram () {
+echo "--------------------"
+echo "running zram tests"
+echo "--------------------"
+./zram01.sh
+echo ""
+./zram02.sh
+}
+
+check_prereqs
+
+run_zram
diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh
new file mode 100755
index 000000000..8abc99650
--- /dev/null
+++ b/tools/testing/selftests/zram/zram01.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+# GNU General Public License for more details.
+#
+# Test creates several zram devices with different filesystems on them.
+# It fills each device with zeros and checks that compression works.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+TCID="zram01"
+ERR_CODE=0
+
+. ./zram_lib.sh
+
+# Test will create the following number of zram devices:
+dev_num=1
+# This is a list of parameters for zram devices.
+# Number of items must be equal to 'dev_num' parameter.
+zram_max_streams="2"
+
+# The zram sysfs node 'disksize' value can be either in bytes,
+# or you can use mem suffixes. But in some old kernels, mem
+# suffixes are not supported, for example, in RHEL6.6GA's kernel
+# layer, it uses strict_strtoull() to parse disksize which does
+# not support mem suffixes, in some newer kernels, they use
+# memparse() which supports mem suffixes. So here we just use
+# bytes to make sure everything works correctly.
+zram_sizes="2097152" # 2MB
+zram_mem_limits="2M"
+zram_filesystems="ext4"
+zram_algs="lzo"
+
+zram_fill_fs()
+{
+	for i in $(seq $dev_start $dev_end); do
+		echo "fill zram$i..."
+		local b=0
+		while [ true ]; do
+			dd conv=notrunc if=/dev/zero of=zram${i}/file \
+				oflag=append count=1 bs=1024 status=none \
+				> /dev/null 2>&1 || break
+			b=$(($b + 1))
+		done
+		echo "zram$i can be filled with '$b' KB"
+
+		local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"`
+		local v=$((100 * 1024 * $b / $mem_used_total))
+		if [ "$v" -lt 100 ]; then
+			 echo "FAIL compression ratio: 0.$v:1"
+			 ERR_CODE=-1
+			 return
+		fi
+
+		echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
+	done
+}
+
+check_prereqs
+zram_load
+zram_max_streams
+zram_compress_alg
+zram_set_disksizes
+zram_set_memlimit
+zram_makefs
+zram_mount
+
+zram_fill_fs
+zram_cleanup
+
+if [ $ERR_CODE -ne 0 ]; then
+	echo "$TCID : [FAIL]"
+else
+	echo "$TCID : [PASS]"
+fi
diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh
new file mode 100755
index 000000000..3768cfd2e
--- /dev/null
+++ b/tools/testing/selftests/zram/zram02.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+# GNU General Public License for more details.
+#
+# Test checks that we can create swap zram device.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+TCID="zram02"
+ERR_CODE=0
+
+. ./zram_lib.sh
+
+# Test will create the following number of zram devices:
+dev_num=1
+# This is a list of parameters for zram devices.
+# Number of items must be equal to 'dev_num' parameter.
+zram_max_streams="2"
+
+# The zram sysfs node 'disksize' value can be either in bytes,
+# or you can use mem suffixes. But in some old kernels, mem
+# suffixes are not supported, for example, in RHEL6.6GA's kernel
+# layer, it uses strict_strtoull() to parse disksize which does
+# not support mem suffixes, in some newer kernels, they use
+# memparse() which supports mem suffixes. So here we just use
+# bytes to make sure everything works correctly.
+zram_sizes="1048576" # 1M
+zram_mem_limits="1M"
+
+check_prereqs
+zram_load
+zram_max_streams
+zram_set_disksizes
+zram_set_memlimit
+zram_makeswap
+zram_swapoff
+zram_cleanup
+
+if [ $ERR_CODE -ne 0 ]; then
+	echo "$TCID : [FAIL]"
+else
+	echo "$TCID : [PASS]"
+fi
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
new file mode 100755
index 000000000..130d193cb
--- /dev/null
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -0,0 +1,278 @@
+#!/bin/sh
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	See the
+# GNU General Public License for more details.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+dev_makeswap=-1
+dev_mounted=-1
+dev_start=0
+dev_end=-1
+module_load=-1
+sys_control=-1
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+kernel_version=`uname -r | cut -d'.' -f1,2`
+kernel_major=${kernel_version%.*}
+kernel_minor=${kernel_version#*.}
+
+trap INT
+
+check_prereqs()
+{
+	local msg="skip all tests:"
+	local uid=$(id -u)
+
+	if [ $uid -ne 0 ]; then
+		echo $msg must be run as root >&2
+		exit $ksft_skip
+	fi
+}
+
+kernel_gte()
+{
+	major=${1%.*}
+	minor=${1#*.}
+
+	if [ $kernel_major -gt $major ]; then
+		return 0
+	elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then
+		return 0
+	fi
+
+	return 1
+}
+
+zram_cleanup()
+{
+	echo "zram cleanup"
+	local i=
+	for i in $(seq $dev_start $dev_makeswap); do
+		swapoff /dev/zram$i
+	done
+
+	for i in $(seq $dev_start $dev_mounted); do
+		umount /dev/zram$i
+	done
+
+	for i in $(seq $dev_start $dev_end); do
+		echo 1 > /sys/block/zram${i}/reset
+		rm -rf zram$i
+	done
+
+	if [ $sys_control -eq 1 ]; then
+		for i in $(seq $dev_start $dev_end); do
+			echo $i > /sys/class/zram-control/hot_remove
+		done
+	fi
+
+	if [ $module_load -eq 1 ]; then
+		rmmod zram > /dev/null 2>&1
+	fi
+}
+
+zram_load()
+{
+	echo "create '$dev_num' zram device(s)"
+
+	# zram module loaded, new kernel
+	if [ -d "/sys/class/zram-control" ]; then
+		echo "zram modules already loaded, kernel supports" \
+			"zram-control interface"
+		dev_start=$(ls /dev/zram* | wc -w)
+		dev_end=$(($dev_start + $dev_num - 1))
+		sys_control=1
+
+		for i in $(seq $dev_start $dev_end); do
+			cat /sys/class/zram-control/hot_add > /dev/null
+		done
+
+		echo "all zram devices (/dev/zram$dev_start~$dev_end" \
+			"successfully created"
+		return 0
+	fi
+
+	# detect old kernel or built-in
+	modprobe zram num_devices=$dev_num
+	if [ ! -d "/sys/class/zram-control" ]; then
+		if grep -q '^zram' /proc/modules; then
+			rmmod zram > /dev/null 2>&1
+			if [ $? -ne 0 ]; then
+				echo "zram module is being used on old kernel" \
+					"without zram-control interface"
+				exit $ksft_skip
+			fi
+		else
+			echo "test needs CONFIG_ZRAM=m on old kernel without" \
+				"zram-control interface"
+			exit $ksft_skip
+		fi
+		modprobe zram num_devices=$dev_num
+	fi
+
+	module_load=1
+	dev_end=$(($dev_num - 1))
+	echo "all zram devices (/dev/zram0~$dev_end) successfully created"
+}
+
+zram_max_streams()
+{
+	echo "set max_comp_streams to zram device(s)"
+
+	kernel_gte 4.7
+	if [ $? -eq 0 ]; then
+		echo "The device attribute max_comp_streams was"\
+		               "deprecated in 4.7"
+		return 0
+	fi
+
+	local i=$dev_start
+	for max_s in $zram_max_streams; do
+		local sys_path="/sys/block/zram${i}/max_comp_streams"
+		echo $max_s > $sys_path || \
+			echo "FAIL failed to set '$max_s' to $sys_path"
+		sleep 1
+		local max_streams=$(cat $sys_path)
+
+		[ "$max_s" -ne "$max_streams" ] && \
+			echo "FAIL can't set max_streams '$max_s', get $max_stream"
+
+		i=$(($i + 1))
+		echo "$sys_path = '$max_streams'"
+	done
+
+	echo "zram max streams: OK"
+}
+
+zram_compress_alg()
+{
+	echo "test that we can set compression algorithm"
+
+	local i=$dev_start
+	local algs=$(cat /sys/block/zram${i}/comp_algorithm)
+	echo "supported algs: $algs"
+
+	for alg in $zram_algs; do
+		local sys_path="/sys/block/zram${i}/comp_algorithm"
+		echo "$alg" >	$sys_path || \
+			echo "FAIL can't set '$alg' to $sys_path"
+		i=$(($i + 1))
+		echo "$sys_path = '$alg'"
+	done
+
+	echo "zram set compression algorithm: OK"
+}
+
+zram_set_disksizes()
+{
+	echo "set disk size to zram device(s)"
+	local i=$dev_start
+	for ds in $zram_sizes; do
+		local sys_path="/sys/block/zram${i}/disksize"
+		echo "$ds" >	$sys_path || \
+			echo "FAIL can't set '$ds' to $sys_path"
+
+		i=$(($i + 1))
+		echo "$sys_path = '$ds'"
+	done
+
+	echo "zram set disksizes: OK"
+}
+
+zram_set_memlimit()
+{
+	echo "set memory limit to zram device(s)"
+
+	local i=$dev_start
+	for ds in $zram_mem_limits; do
+		local sys_path="/sys/block/zram${i}/mem_limit"
+		echo "$ds" >	$sys_path || \
+			echo "FAIL can't set '$ds' to $sys_path"
+
+		i=$(($i + 1))
+		echo "$sys_path = '$ds'"
+	done
+
+	echo "zram set memory limit: OK"
+}
+
+zram_makeswap()
+{
+	echo "make swap with zram device(s)"
+	local i=$dev_start
+	for i in $(seq $dev_start $dev_end); do
+		mkswap /dev/zram$i > err.log 2>&1
+		if [ $? -ne 0 ]; then
+			cat err.log
+			echo "FAIL mkswap /dev/zram$1 failed"
+		fi
+
+		swapon /dev/zram$i > err.log 2>&1
+		if [ $? -ne 0 ]; then
+			cat err.log
+			echo "FAIL swapon /dev/zram$1 failed"
+		fi
+
+		echo "done with /dev/zram$i"
+		dev_makeswap=$i
+	done
+
+	echo "zram making zram mkswap and swapon: OK"
+}
+
+zram_swapoff()
+{
+	local i=
+	for i in $(seq $dev_start $dev_end); do
+		swapoff /dev/zram$i > err.log 2>&1
+		if [ $? -ne 0 ]; then
+			cat err.log
+			echo "FAIL swapoff /dev/zram$i failed"
+		fi
+	done
+	dev_makeswap=-1
+
+	echo "zram swapoff: OK"
+}
+
+zram_makefs()
+{
+	local i=$dev_start
+	for fs in $zram_filesystems; do
+		# if requested fs not supported default it to ext2
+		which mkfs.$fs > /dev/null 2>&1 || fs=ext2
+
+		echo "make $fs filesystem on /dev/zram$i"
+		mkfs.$fs /dev/zram$i > err.log 2>&1
+		if [ $? -ne 0 ]; then
+			cat err.log
+			echo "FAIL failed to make $fs on /dev/zram$i"
+		fi
+		i=$(($i + 1))
+		echo "zram mkfs.$fs: OK"
+	done
+}
+
+zram_mount()
+{
+	local i=0
+	for i in $(seq $dev_start $dev_end); do
+		echo "mount /dev/zram$i"
+		mkdir zram$i
+		mount /dev/zram$i zram$i > /dev/null || \
+			echo "FAIL mount /dev/zram$i failed"
+		dev_mounted=$i
+	done
+
+	echo "zram mount of zram device(s): OK"
+}
diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore
new file mode 100644
index 000000000..dc5f11faf
--- /dev/null
+++ b/tools/testing/vsock/.gitignore
@@ -0,0 +1,2 @@
+*.d
+vsock_diag_test
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
new file mode 100644
index 000000000..66ba09241
--- /dev/null
+++ b/tools/testing/vsock/Makefile
@@ -0,0 +1,9 @@
+all: test
+test: vsock_diag_test
+vsock_diag_test: vsock_diag_test.o timeout.o control.o
+
+CFLAGS += -g -O2 -Werror -Wall -I. -I../../include/uapi -I../../include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
+.PHONY: all test clean
+clean:
+	${RM} *.o *.d vsock_diag_test
+-include *.d
diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README
new file mode 100644
index 000000000..2cc6d7302
--- /dev/null
+++ b/tools/testing/vsock/README
@@ -0,0 +1,36 @@
+AF_VSOCK test suite
+-------------------
+These tests exercise net/vmw_vsock/ host<->guest sockets for VMware, KVM, and
+Hyper-V.
+
+The following tests are available:
+
+  * vsock_diag_test - vsock_diag.ko module for listing open sockets
+
+The following prerequisite steps are not automated and must be performed prior
+to running tests:
+
+1. Build the kernel and these tests.
+2. Install the kernel and tests on the host.
+3. Install the kernel and tests inside the guest.
+4. Boot the guest and ensure that the AF_VSOCK transport is enabled.
+
+Invoke test binaries in both directions as follows:
+
+  # host=server, guest=client
+  (host)# $TEST_BINARY --mode=server \
+                       --control-port=1234 \
+                       --peer-cid=3
+  (guest)# $TEST_BINARY --mode=client \
+                        --control-host=$HOST_IP \
+                        --control-port=1234 \
+                        --peer-cid=2
+
+  # host=client, guest=server
+  (guest)# $TEST_BINARY --mode=server \
+                        --control-port=1234 \
+                        --peer-cid=2
+  (host)# $TEST_BINARY --mode=client \
+                       --control-port=$GUEST_IP \
+                       --control-port=1234 \
+                       --peer-cid=3
diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c
new file mode 100644
index 000000000..90fd47f0e
--- /dev/null
+++ b/tools/testing/vsock/control.c
@@ -0,0 +1,219 @@
+/* Control socket for client/server test execution
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/* The client and server may need to coordinate to avoid race conditions like
+ * the client attempting to connect to a socket that the server is not
+ * listening on yet.  The control socket offers a communications channel for
+ * such coordination tasks.
+ *
+ * If the client calls control_expectln("LISTENING"), then it will block until
+ * the server calls control_writeln("LISTENING").  This provides a simple
+ * mechanism for coordinating between the client and the server.
+ */
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "timeout.h"
+#include "control.h"
+
+static int control_fd = -1;
+
+/* Open the control socket, either in server or client mode */
+void control_init(const char *control_host,
+		  const char *control_port,
+		  bool server)
+{
+	struct addrinfo hints = {
+		.ai_socktype = SOCK_STREAM,
+	};
+	struct addrinfo *result = NULL;
+	struct addrinfo *ai;
+	int ret;
+
+	ret = getaddrinfo(control_host, control_port, &hints, &result);
+	if (ret != 0) {
+		fprintf(stderr, "%s\n", gai_strerror(ret));
+		exit(EXIT_FAILURE);
+	}
+
+	for (ai = result; ai; ai = ai->ai_next) {
+		int fd;
+		int val = 1;
+
+		fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+		if (fd < 0)
+			continue;
+
+		if (!server) {
+			if (connect(fd, ai->ai_addr, ai->ai_addrlen) < 0)
+				goto next;
+			control_fd = fd;
+			printf("Control socket connected to %s:%s.\n",
+			       control_host, control_port);
+			break;
+		}
+
+		if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+			       &val, sizeof(val)) < 0) {
+			perror("setsockopt");
+			exit(EXIT_FAILURE);
+		}
+
+		if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0)
+			goto next;
+		if (listen(fd, 1) < 0)
+			goto next;
+
+		printf("Control socket listening on %s:%s\n",
+		       control_host, control_port);
+		fflush(stdout);
+
+		control_fd = accept(fd, NULL, 0);
+		close(fd);
+
+		if (control_fd < 0) {
+			perror("accept");
+			exit(EXIT_FAILURE);
+		}
+		printf("Control socket connection accepted...\n");
+		break;
+
+next:
+		close(fd);
+	}
+
+	if (control_fd < 0) {
+		fprintf(stderr, "Control socket initialization failed.  Invalid address %s:%s?\n",
+			control_host, control_port);
+		exit(EXIT_FAILURE);
+	}
+
+	freeaddrinfo(result);
+}
+
+/* Free resources */
+void control_cleanup(void)
+{
+	close(control_fd);
+	control_fd = -1;
+}
+
+/* Write a line to the control socket */
+void control_writeln(const char *str)
+{
+	ssize_t len = strlen(str);
+	ssize_t ret;
+
+	timeout_begin(TIMEOUT);
+
+	do {
+		ret = send(control_fd, str, len, MSG_MORE);
+		timeout_check("send");
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret != len) {
+		perror("send");
+		exit(EXIT_FAILURE);
+	}
+
+	do {
+		ret = send(control_fd, "\n", 1, 0);
+		timeout_check("send");
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret != 1) {
+		perror("send");
+		exit(EXIT_FAILURE);
+	}
+
+	timeout_end();
+}
+
+/* Return the next line from the control socket (without the trailing newline).
+ *
+ * The program terminates if a timeout occurs.
+ *
+ * The caller must free() the returned string.
+ */
+char *control_readln(void)
+{
+	char *buf = NULL;
+	size_t idx = 0;
+	size_t buflen = 0;
+
+	timeout_begin(TIMEOUT);
+
+	for (;;) {
+		ssize_t ret;
+
+		if (idx >= buflen) {
+			char *new_buf;
+
+			new_buf = realloc(buf, buflen + 80);
+			if (!new_buf) {
+				perror("realloc");
+				exit(EXIT_FAILURE);
+			}
+
+			buf = new_buf;
+			buflen += 80;
+		}
+
+		do {
+			ret = recv(control_fd, &buf[idx], 1, 0);
+			timeout_check("recv");
+		} while (ret < 0 && errno == EINTR);
+
+		if (ret == 0) {
+			fprintf(stderr, "unexpected EOF on control socket\n");
+			exit(EXIT_FAILURE);
+		}
+
+		if (ret != 1) {
+			perror("recv");
+			exit(EXIT_FAILURE);
+		}
+
+		if (buf[idx] == '\n') {
+			buf[idx] = '\0';
+			break;
+		}
+
+		idx++;
+	}
+
+	timeout_end();
+
+	return buf;
+}
+
+/* Wait until a given line is received or a timeout occurs */
+void control_expectln(const char *str)
+{
+	char *line;
+
+	line = control_readln();
+	if (strcmp(str, line) != 0) {
+		fprintf(stderr, "expected \"%s\" on control socket, got \"%s\"\n",
+			str, line);
+		exit(EXIT_FAILURE);
+	}
+
+	free(line);
+}
diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h
new file mode 100644
index 000000000..54a07efd2
--- /dev/null
+++ b/tools/testing/vsock/control.h
@@ -0,0 +1,13 @@
+#ifndef CONTROL_H
+#define CONTROL_H
+
+#include <stdbool.h>
+
+void control_init(const char *control_host, const char *control_port,
+		  bool server);
+void control_cleanup(void);
+void control_writeln(const char *str);
+char *control_readln(void);
+void control_expectln(const char *str);
+
+#endif /* CONTROL_H */
diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c
new file mode 100644
index 000000000..c49b3003b
--- /dev/null
+++ b/tools/testing/vsock/timeout.c
@@ -0,0 +1,64 @@
+/* Timeout API for single-threaded programs that use blocking
+ * syscalls (read/write/send/recv/connect/accept).
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/* Use the following pattern:
+ *
+ *   timeout_begin(TIMEOUT);
+ *   do {
+ *       ret = accept(...);
+ *       timeout_check("accept");
+ *   } while (ret < 0 && ret == EINTR);
+ *   timeout_end();
+ */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdio.h>
+#include "timeout.h"
+
+static volatile bool timeout;
+
+/* SIGALRM handler function.  Do not use sleep(2), alarm(2), or
+ * setitimer(2) while using this API - they may interfere with each
+ * other.
+ */
+void sigalrm(int signo)
+{
+	timeout = true;
+}
+
+/* Start a timeout.  Call timeout_check() to verify that the timeout hasn't
+ * expired.  timeout_end() must be called to stop the timeout.  Timeouts cannot
+ * be nested.
+ */
+void timeout_begin(unsigned int seconds)
+{
+	alarm(seconds);
+}
+
+/* Exit with an error message if the timeout has expired */
+void timeout_check(const char *operation)
+{
+	if (timeout) {
+		fprintf(stderr, "%s timed out\n", operation);
+		exit(EXIT_FAILURE);
+	}
+}
+
+/* Stop a timeout */
+void timeout_end(void)
+{
+	alarm(0);
+	timeout = false;
+}
diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h
new file mode 100644
index 000000000..77db9ce98
--- /dev/null
+++ b/tools/testing/vsock/timeout.h
@@ -0,0 +1,14 @@
+#ifndef TIMEOUT_H
+#define TIMEOUT_H
+
+enum {
+	/* Default timeout */
+	TIMEOUT = 10 /* seconds */
+};
+
+void sigalrm(int signo);
+void timeout_begin(unsigned int seconds);
+void timeout_check(const char *operation);
+void timeout_end(void);
+
+#endif /* TIMEOUT_H */
diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c
new file mode 100644
index 000000000..e896a4af5
--- /dev/null
+++ b/tools/testing/vsock/vsock_diag_test.c
@@ -0,0 +1,681 @@
+/*
+ * vsock_diag_test - vsock_diag.ko test suite
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/list.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/sock_diag.h>
+#include <netinet/tcp.h>
+
+#include "../../../include/uapi/linux/vm_sockets.h"
+#include "../../../include/uapi/linux/vm_sockets_diag.h"
+
+#include "timeout.h"
+#include "control.h"
+
+enum test_mode {
+	TEST_MODE_UNSET,
+	TEST_MODE_CLIENT,
+	TEST_MODE_SERVER
+};
+
+/* Per-socket status */
+struct vsock_stat {
+	struct list_head list;
+	struct vsock_diag_msg msg;
+};
+
+static const char *sock_type_str(int type)
+{
+	switch (type) {
+	case SOCK_DGRAM:
+		return "DGRAM";
+	case SOCK_STREAM:
+		return "STREAM";
+	default:
+		return "INVALID TYPE";
+	}
+}
+
+static const char *sock_state_str(int state)
+{
+	switch (state) {
+	case TCP_CLOSE:
+		return "UNCONNECTED";
+	case TCP_SYN_SENT:
+		return "CONNECTING";
+	case TCP_ESTABLISHED:
+		return "CONNECTED";
+	case TCP_CLOSING:
+		return "DISCONNECTING";
+	case TCP_LISTEN:
+		return "LISTEN";
+	default:
+		return "INVALID STATE";
+	}
+}
+
+static const char *sock_shutdown_str(int shutdown)
+{
+	switch (shutdown) {
+	case 1:
+		return "RCV_SHUTDOWN";
+	case 2:
+		return "SEND_SHUTDOWN";
+	case 3:
+		return "RCV_SHUTDOWN | SEND_SHUTDOWN";
+	default:
+		return "0";
+	}
+}
+
+static void print_vsock_addr(FILE *fp, unsigned int cid, unsigned int port)
+{
+	if (cid == VMADDR_CID_ANY)
+		fprintf(fp, "*:");
+	else
+		fprintf(fp, "%u:", cid);
+
+	if (port == VMADDR_PORT_ANY)
+		fprintf(fp, "*");
+	else
+		fprintf(fp, "%u", port);
+}
+
+static void print_vsock_stat(FILE *fp, struct vsock_stat *st)
+{
+	print_vsock_addr(fp, st->msg.vdiag_src_cid, st->msg.vdiag_src_port);
+	fprintf(fp, " ");
+	print_vsock_addr(fp, st->msg.vdiag_dst_cid, st->msg.vdiag_dst_port);
+	fprintf(fp, " %s %s %s %u\n",
+		sock_type_str(st->msg.vdiag_type),
+		sock_state_str(st->msg.vdiag_state),
+		sock_shutdown_str(st->msg.vdiag_shutdown),
+		st->msg.vdiag_ino);
+}
+
+static void print_vsock_stats(FILE *fp, struct list_head *head)
+{
+	struct vsock_stat *st;
+
+	list_for_each_entry(st, head, list)
+		print_vsock_stat(fp, st);
+}
+
+static struct vsock_stat *find_vsock_stat(struct list_head *head, int fd)
+{
+	struct vsock_stat *st;
+	struct stat stat;
+
+	if (fstat(fd, &stat) < 0) {
+		perror("fstat");
+		exit(EXIT_FAILURE);
+	}
+
+	list_for_each_entry(st, head, list)
+		if (st->msg.vdiag_ino == stat.st_ino)
+			return st;
+
+	fprintf(stderr, "cannot find fd %d\n", fd);
+	exit(EXIT_FAILURE);
+}
+
+static void check_no_sockets(struct list_head *head)
+{
+	if (!list_empty(head)) {
+		fprintf(stderr, "expected no sockets\n");
+		print_vsock_stats(stderr, head);
+		exit(1);
+	}
+}
+
+static void check_num_sockets(struct list_head *head, int expected)
+{
+	struct list_head *node;
+	int n = 0;
+
+	list_for_each(node, head)
+		n++;
+
+	if (n != expected) {
+		fprintf(stderr, "expected %d sockets, found %d\n",
+			expected, n);
+		print_vsock_stats(stderr, head);
+		exit(EXIT_FAILURE);
+	}
+}
+
+static void check_socket_state(struct vsock_stat *st, __u8 state)
+{
+	if (st->msg.vdiag_state != state) {
+		fprintf(stderr, "expected socket state %#x, got %#x\n",
+			state, st->msg.vdiag_state);
+		exit(EXIT_FAILURE);
+	}
+}
+
+static void send_req(int fd)
+{
+	struct sockaddr_nl nladdr = {
+		.nl_family = AF_NETLINK,
+	};
+	struct {
+		struct nlmsghdr nlh;
+		struct vsock_diag_req vreq;
+	} req = {
+		.nlh = {
+			.nlmsg_len = sizeof(req),
+			.nlmsg_type = SOCK_DIAG_BY_FAMILY,
+			.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
+		},
+		.vreq = {
+			.sdiag_family = AF_VSOCK,
+			.vdiag_states = ~(__u32)0,
+		},
+	};
+	struct iovec iov = {
+		.iov_base = &req,
+		.iov_len = sizeof(req),
+	};
+	struct msghdr msg = {
+		.msg_name = &nladdr,
+		.msg_namelen = sizeof(nladdr),
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+
+	for (;;) {
+		if (sendmsg(fd, &msg, 0) < 0) {
+			if (errno == EINTR)
+				continue;
+
+			perror("sendmsg");
+			exit(EXIT_FAILURE);
+		}
+
+		return;
+	}
+}
+
+static ssize_t recv_resp(int fd, void *buf, size_t len)
+{
+	struct sockaddr_nl nladdr = {
+		.nl_family = AF_NETLINK,
+	};
+	struct iovec iov = {
+		.iov_base = buf,
+		.iov_len = len,
+	};
+	struct msghdr msg = {
+		.msg_name = &nladdr,
+		.msg_namelen = sizeof(nladdr),
+		.msg_iov = &iov,
+		.msg_iovlen = 1,
+	};
+	ssize_t ret;
+
+	do {
+		ret = recvmsg(fd, &msg, 0);
+	} while (ret < 0 && errno == EINTR);
+
+	if (ret < 0) {
+		perror("recvmsg");
+		exit(EXIT_FAILURE);
+	}
+
+	return ret;
+}
+
+static void add_vsock_stat(struct list_head *sockets,
+			   const struct vsock_diag_msg *resp)
+{
+	struct vsock_stat *st;
+
+	st = malloc(sizeof(*st));
+	if (!st) {
+		perror("malloc");
+		exit(EXIT_FAILURE);
+	}
+
+	st->msg = *resp;
+	list_add_tail(&st->list, sockets);
+}
+
+/*
+ * Read vsock stats into a list.
+ */
+static void read_vsock_stat(struct list_head *sockets)
+{
+	long buf[8192 / sizeof(long)];
+	int fd;
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+	if (fd < 0) {
+		perror("socket");
+		exit(EXIT_FAILURE);
+	}
+
+	send_req(fd);
+
+	for (;;) {
+		const struct nlmsghdr *h;
+		ssize_t ret;
+
+		ret = recv_resp(fd, buf, sizeof(buf));
+		if (ret == 0)
+			goto done;
+		if (ret < sizeof(*h)) {
+			fprintf(stderr, "short read of %zd bytes\n", ret);
+			exit(EXIT_FAILURE);
+		}
+
+		h = (struct nlmsghdr *)buf;
+
+		while (NLMSG_OK(h, ret)) {
+			if (h->nlmsg_type == NLMSG_DONE)
+				goto done;
+
+			if (h->nlmsg_type == NLMSG_ERROR) {
+				const struct nlmsgerr *err = NLMSG_DATA(h);
+
+				if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+					fprintf(stderr, "NLMSG_ERROR\n");
+				else {
+					errno = -err->error;
+					perror("NLMSG_ERROR");
+				}
+
+				exit(EXIT_FAILURE);
+			}
+
+			if (h->nlmsg_type != SOCK_DIAG_BY_FAMILY) {
+				fprintf(stderr, "unexpected nlmsg_type %#x\n",
+					h->nlmsg_type);
+				exit(EXIT_FAILURE);
+			}
+			if (h->nlmsg_len <
+			    NLMSG_LENGTH(sizeof(struct vsock_diag_msg))) {
+				fprintf(stderr, "short vsock_diag_msg\n");
+				exit(EXIT_FAILURE);
+			}
+
+			add_vsock_stat(sockets, NLMSG_DATA(h));
+
+			h = NLMSG_NEXT(h, ret);
+		}
+	}
+
+done:
+	close(fd);
+}
+
+static void free_sock_stat(struct list_head *sockets)
+{
+	struct vsock_stat *st;
+	struct vsock_stat *next;
+
+	list_for_each_entry_safe(st, next, sockets, list)
+		free(st);
+}
+
+static void test_no_sockets(unsigned int peer_cid)
+{
+	LIST_HEAD(sockets);
+
+	read_vsock_stat(&sockets);
+
+	check_no_sockets(&sockets);
+
+	free_sock_stat(&sockets);
+}
+
+static void test_listen_socket_server(unsigned int peer_cid)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} addr = {
+		.svm = {
+			.svm_family = AF_VSOCK,
+			.svm_port = 1234,
+			.svm_cid = VMADDR_CID_ANY,
+		},
+	};
+	LIST_HEAD(sockets);
+	struct vsock_stat *st;
+	int fd;
+
+	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+	if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+		perror("bind");
+		exit(EXIT_FAILURE);
+	}
+
+	if (listen(fd, 1) < 0) {
+		perror("listen");
+		exit(EXIT_FAILURE);
+	}
+
+	read_vsock_stat(&sockets);
+
+	check_num_sockets(&sockets, 1);
+	st = find_vsock_stat(&sockets, fd);
+	check_socket_state(st, TCP_LISTEN);
+
+	close(fd);
+	free_sock_stat(&sockets);
+}
+
+static void test_connect_client(unsigned int peer_cid)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} addr = {
+		.svm = {
+			.svm_family = AF_VSOCK,
+			.svm_port = 1234,
+			.svm_cid = peer_cid,
+		},
+	};
+	int fd;
+	int ret;
+	LIST_HEAD(sockets);
+	struct vsock_stat *st;
+
+	control_expectln("LISTENING");
+
+	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+	timeout_begin(TIMEOUT);
+	do {
+		ret = connect(fd, &addr.sa, sizeof(addr.svm));
+		timeout_check("connect");
+	} while (ret < 0 && errno == EINTR);
+	timeout_end();
+
+	if (ret < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	read_vsock_stat(&sockets);
+
+	check_num_sockets(&sockets, 1);
+	st = find_vsock_stat(&sockets, fd);
+	check_socket_state(st, TCP_ESTABLISHED);
+
+	control_expectln("DONE");
+	control_writeln("DONE");
+
+	close(fd);
+	free_sock_stat(&sockets);
+}
+
+static void test_connect_server(unsigned int peer_cid)
+{
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} addr = {
+		.svm = {
+			.svm_family = AF_VSOCK,
+			.svm_port = 1234,
+			.svm_cid = VMADDR_CID_ANY,
+		},
+	};
+	union {
+		struct sockaddr sa;
+		struct sockaddr_vm svm;
+	} clientaddr;
+	socklen_t clientaddr_len = sizeof(clientaddr.svm);
+	LIST_HEAD(sockets);
+	struct vsock_stat *st;
+	int fd;
+	int client_fd;
+
+	fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+	if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+		perror("bind");
+		exit(EXIT_FAILURE);
+	}
+
+	if (listen(fd, 1) < 0) {
+		perror("listen");
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("LISTENING");
+
+	timeout_begin(TIMEOUT);
+	do {
+		client_fd = accept(fd, &clientaddr.sa, &clientaddr_len);
+		timeout_check("accept");
+	} while (client_fd < 0 && errno == EINTR);
+	timeout_end();
+
+	if (client_fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+	if (clientaddr.sa.sa_family != AF_VSOCK) {
+		fprintf(stderr, "expected AF_VSOCK from accept(2), got %d\n",
+			clientaddr.sa.sa_family);
+		exit(EXIT_FAILURE);
+	}
+	if (clientaddr.svm.svm_cid != peer_cid) {
+		fprintf(stderr, "expected peer CID %u from accept(2), got %u\n",
+			peer_cid, clientaddr.svm.svm_cid);
+		exit(EXIT_FAILURE);
+	}
+
+	read_vsock_stat(&sockets);
+
+	check_num_sockets(&sockets, 2);
+	find_vsock_stat(&sockets, fd);
+	st = find_vsock_stat(&sockets, client_fd);
+	check_socket_state(st, TCP_ESTABLISHED);
+
+	control_writeln("DONE");
+	control_expectln("DONE");
+
+	close(client_fd);
+	close(fd);
+	free_sock_stat(&sockets);
+}
+
+static struct {
+	const char *name;
+	void (*run_client)(unsigned int peer_cid);
+	void (*run_server)(unsigned int peer_cid);
+} test_cases[] = {
+	{
+		.name = "No sockets",
+		.run_server = test_no_sockets,
+	},
+	{
+		.name = "Listen socket",
+		.run_server = test_listen_socket_server,
+	},
+	{
+		.name = "Connect",
+		.run_client = test_connect_client,
+		.run_server = test_connect_server,
+	},
+	{},
+};
+
+static void init_signals(void)
+{
+	struct sigaction act = {
+		.sa_handler = sigalrm,
+	};
+
+	sigaction(SIGALRM, &act, NULL);
+	signal(SIGPIPE, SIG_IGN);
+}
+
+static unsigned int parse_cid(const char *str)
+{
+	char *endptr = NULL;
+	unsigned long int n;
+
+	errno = 0;
+	n = strtoul(str, &endptr, 10);
+	if (errno || *endptr != '\0') {
+		fprintf(stderr, "malformed CID \"%s\"\n", str);
+		exit(EXIT_FAILURE);
+	}
+	return n;
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+	{
+		.name = "control-host",
+		.has_arg = required_argument,
+		.val = 'H',
+	},
+	{
+		.name = "control-port",
+		.has_arg = required_argument,
+		.val = 'P',
+	},
+	{
+		.name = "mode",
+		.has_arg = required_argument,
+		.val = 'm',
+	},
+	{
+		.name = "peer-cid",
+		.has_arg = required_argument,
+		.val = 'p',
+	},
+	{
+		.name = "help",
+		.has_arg = no_argument,
+		.val = '?',
+	},
+	{},
+};
+
+static void usage(void)
+{
+	fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n"
+		"\n"
+		"  Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n"
+		"  Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
+		"\n"
+		"Run vsock_diag.ko tests.  Must be launched in both\n"
+		"guest and host.  One side must use --mode=client and\n"
+		"the other side must use --mode=server.\n"
+		"\n"
+		"A TCP control socket connection is used to coordinate tests\n"
+		"between the client and the server.  The server requires a\n"
+		"listen address and the client requires an address to\n"
+		"connect to.\n"
+		"\n"
+		"The CID of the other side must be given with --peer-cid=<cid>.\n");
+	exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+	const char *control_host = NULL;
+	const char *control_port = NULL;
+	int mode = TEST_MODE_UNSET;
+	unsigned int peer_cid = VMADDR_CID_ANY;
+	int i;
+
+	init_signals();
+
+	for (;;) {
+		int opt = getopt_long(argc, argv, optstring, longopts, NULL);
+
+		if (opt == -1)
+			break;
+
+		switch (opt) {
+		case 'H':
+			control_host = optarg;
+			break;
+		case 'm':
+			if (strcmp(optarg, "client") == 0)
+				mode = TEST_MODE_CLIENT;
+			else if (strcmp(optarg, "server") == 0)
+				mode = TEST_MODE_SERVER;
+			else {
+				fprintf(stderr, "--mode must be \"client\" or \"server\"\n");
+				return EXIT_FAILURE;
+			}
+			break;
+		case 'p':
+			peer_cid = parse_cid(optarg);
+			break;
+		case 'P':
+			control_port = optarg;
+			break;
+		case '?':
+		default:
+			usage();
+		}
+	}
+
+	if (!control_port)
+		usage();
+	if (mode == TEST_MODE_UNSET)
+		usage();
+	if (peer_cid == VMADDR_CID_ANY)
+		usage();
+
+	if (!control_host) {
+		if (mode != TEST_MODE_SERVER)
+			usage();
+		control_host = "0.0.0.0";
+	}
+
+	control_init(control_host, control_port, mode == TEST_MODE_SERVER);
+
+	for (i = 0; test_cases[i].name; i++) {
+		void (*run)(unsigned int peer_cid);
+
+		printf("%s...", test_cases[i].name);
+		fflush(stdout);
+
+		if (mode == TEST_MODE_CLIENT)
+			run = test_cases[i].run_client;
+		else
+			run = test_cases[i].run_server;
+
+		if (run)
+			run(peer_cid);
+
+		printf("ok\n");
+	}
+
+	control_cleanup();
+	return EXIT_SUCCESS;
+}