From 421d9d1bea6545543c00ffba4c83f369510de9a1 Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 8 Mar 2021 02:24:59 +0000 Subject: tools/latency-collector: Remove unneeded semicolon Fix semicolon.cocci warning: tools/tracing/latency/latency-collector.c:1021:2-3: Unneeded semicolon Link: https://lkml.kernel.org/r/20210308022459.59881-1-vulab@iscas.ac.cn Reviewed-by: Viktor Rosendahl Signed-off-by: Xu Wang Signed-off-by: Steven Rostedt (VMware) --- tools/tracing/latency/latency-collector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c index b69de9263ee6..3a2e6bb781a8 100644 --- a/tools/tracing/latency/latency-collector.c +++ b/tools/tracing/latency/latency-collector.c @@ -1018,7 +1018,7 @@ static long go_to_sleep(const struct entry *req) cond_timedwait(&printstate.cond, &printstate.mutex, &future); if (time_has_passed(&future)) break; - }; + } if (printstate_has_new_req_arrived(req)) delay = -1; @@ -1941,7 +1941,7 @@ static void scan_arguments(int argc, char *argv[]) if (value < 0) { warnx("TIME must be >= 0\n"); show_usage(); - ; + exit(0); } trace_enable = true; use_random_sleep = true; -- cgit v1.2.3 From b0922c0732c10eabab7ef15c420b0ae6cf540564 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 25 Mar 2021 10:55:36 -0700 Subject: tools: gpio-utils: fix various kernel-doc warnings Fix several problems in kernel-doc notation in gpio-utils.c. gpio-utils.c:37: warning: Incorrect use of kernel-doc format: * gpiotools_request_line() - request gpio lines in a gpiochip gpio-utils.c:61: warning: expecting prototype for doc(). Prototype was for gpiotools_request_line() instead gpio-utils.c:265: warning: Excess function parameter 'value' description in 'gpiotools_sets' gpio-utils.c:1: warning: 'gpiotools_request_lines' not found Signed-off-by: Randy Dunlap Cc: Bartosz Golaszewski Cc: Linus Walleij Cc: linux-gpio@vger.kernel.org Reviewed-by: Linus Walleij Signed-off-by: Bartosz Golaszewski --- tools/gpio/gpio-utils.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c index 1639b4d832cd..4096bcd511d1 100644 --- a/tools/gpio/gpio-utils.c +++ b/tools/gpio/gpio-utils.c @@ -20,7 +20,7 @@ #define CONSUMER "gpio-utils" /** - * doc: Operation of gpio + * DOC: Operation of gpio * * Provide the api of gpiochip for chardev interface. There are two * types of api. The first one provide as same function as each @@ -100,7 +100,7 @@ exit_free_name: } /** - * gpiotools_set_values(): Set the value of gpio(s) + * gpiotools_set_values() - Set the value of gpio(s) * @fd: The fd returned by * gpiotools_request_line(). * @values: The array of values want to set. @@ -124,7 +124,7 @@ int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values) } /** - * gpiotools_get_values(): Get the value of gpio(s) + * gpiotools_get_values() - Get the value of gpio(s) * @fd: The fd returned by * gpiotools_request_line(). * @values: The array of values get from hardware. @@ -148,7 +148,7 @@ int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values) } /** - * gpiotools_release_line(): Release the line(s) of gpiochip + * gpiotools_release_line() - Release the line(s) of gpiochip * @fd: The fd returned by * gpiotools_request_line(). * @@ -169,7 +169,7 @@ int gpiotools_release_line(const int fd) } /** - * gpiotools_get(): Get value from specific line + * gpiotools_get() - Get value from specific line * @device_name: The name of gpiochip without prefix "/dev/", * such as "gpiochip0" * @line: number of line, such as 2. @@ -191,7 +191,7 @@ int gpiotools_get(const char *device_name, unsigned int line) /** - * gpiotools_gets(): Get values from specific lines. + * gpiotools_gets() - Get values from specific lines. * @device_name: The name of gpiochip without prefix "/dev/", * such as "gpiochip0". * @lines: An array desired lines, specified by offset @@ -230,7 +230,7 @@ int gpiotools_gets(const char *device_name, unsigned int *lines, } /** - * gpiotools_set(): Set value to specific line + * gpiotools_set() - Set value to specific line * @device_name: The name of gpiochip without prefix "/dev/", * such as "gpiochip0" * @line: number of line, such as 2. @@ -248,13 +248,13 @@ int gpiotools_set(const char *device_name, unsigned int line, } /** - * gpiotools_sets(): Set values to specific lines. + * gpiotools_sets() - Set values to specific lines. * @device_name: The name of gpiochip without prefix "/dev/", * such as "gpiochip0". * @lines: An array desired lines, specified by offset * index for the associated GPIO device. * @num_lines: The number of lines to request. - * @value: The array of values set to gpiochip, must be + * @values: The array of values set to gpiochip, must be * 0(low) or 1(high). * * Return: On success return 0; -- cgit v1.2.3 From 42e4eefb089f12ea900062ecdcc7ca10c3423a05 Mon Sep 17 00:00:00 2001 From: Hao Fang Date: Tue, 30 Mar 2021 14:33:48 +0800 Subject: dma-mapping: benchmark: use the correct HiSilicon copyright s/Hisilicon/HiSilicon/g. It should use capital S, according to https://www.hisilicon.com/en/terms-of-use. Signed-off-by: Hao Fang Acked-by: Barry Song Signed-off-by: Christoph Hellwig --- tools/testing/selftests/dma/dma_map_benchmark.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index fb23ce9617ea..b492bed0936d 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (C) 2020 Hisilicon Limited. + * Copyright (C) 2020 HiSilicon Limited. */ #include -- cgit v1.2.3 From ca947482b0b30443e6da1f0f5ba7244e34a4f65a Mon Sep 17 00:00:00 2001 From: Xiang Chen Date: Thu, 18 Mar 2021 17:29:30 +0800 Subject: dma-mapping: benchmark: Add support for multi-pages map/unmap Currently it only support one page map/unmap once a time for dma-map benchmark, but there are some other scenaries which need to support for multi-page map/unmap: for those multi-pages interfaces such as dma_alloc_coherent() and dma_map_sg(), the time spent on multi-pages map/unmap is not the time of a single page * npages (not linear) as it may use block description instead of page description when it is satified with the size such as 2M/1G, and also it can send a single TLB invalidation command to invalidate multi-pages instead of multi-times when RIL is enabled (which will short the time of unmap). So it is necessary to add support for multi-pages map/unmap. Add a parameter "-g" to support multi-pages map/unmap. Signed-off-by: Xiang Chen Acked-by: Barry Song Signed-off-by: Christoph Hellwig --- tools/testing/selftests/dma/dma_map_benchmark.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index b492bed0936d..485dff51bad2 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -40,7 +40,8 @@ struct map_benchmark { __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ __u32 dma_trans_ns; /* time for DMA transmission in ns */ - __u8 expansion[80]; /* For future use */ + __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */ + __u8 expansion[76]; /* For future use */ }; int main(int argc, char **argv) @@ -51,11 +52,13 @@ int main(int argc, char **argv) int threads = 1, seconds = 20, node = -1; /* default dma mask 32bit, bidirectional DMA */ int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL; + /* default granule 1 PAGESIZE */ + int granule = 1; int cmd = DMA_MAP_BENCHMARK; char *p; - while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) { + while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) { switch (opt) { case 't': threads = atoi(optarg); @@ -75,6 +78,9 @@ int main(int argc, char **argv) case 'x': xdelay = atoi(optarg); break; + case 'g': + granule = atoi(optarg); + break; default: return -1; } @@ -110,6 +116,11 @@ int main(int argc, char **argv) exit(1); } + if (granule < 1 || granule > 1024) { + fprintf(stderr, "invalid granule size\n"); + exit(1); + } + fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR); if (fd == -1) { perror("open"); @@ -123,14 +134,15 @@ int main(int argc, char **argv) map.dma_bits = bits; map.dma_dir = dir; map.dma_trans_ns = xdelay; + map.granule = granule; if (ioctl(fd, cmd, &map)) { perror("ioctl"); exit(1); } - printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n", - threads, seconds, node, dir[directions]); + printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n", + threads, seconds, node, dir[directions], granule); printf("average map latency(us):%.1f standard deviation:%.1f\n", map.avg_map_100ns/10.0, map.map_stddev/10.0); printf("average unmap latency(us):%.1f standard deviation:%.1f\n", -- cgit v1.2.3 From da2e56634b262fddfa40b2cfedd24de841418cd3 Mon Sep 17 00:00:00 2001 From: "John 'Warthog9' Hawley (VMware)" Date: Mon, 19 Apr 2021 17:29:26 -0700 Subject: ktest: Minor cleanup with uninitialized variable $build_options Signed-off-by: John 'Warthog9' Hawley (VMware) Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 4e2450964517..18fd4fd117dd 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -2594,6 +2594,9 @@ sub build { # Run old config regardless, to enforce min configurations make_oldconfig; + if (not defined($build_options)){ + $build_options = ""; + } my $build_ret = run_command "$make $build_options", $buildlog; if (defined($post_build)) { -- cgit v1.2.3 From 2676eb4bfc546dc490d2abd155877a580c74c294 Mon Sep 17 00:00:00 2001 From: "John 'Warthog9' Hawley (VMware)" Date: Mon, 19 Apr 2021 17:29:27 -0700 Subject: ktest: Add example config for using VMware VMs This duplicates the KVM/Qemu config with specific notes for how to use it with VMware VMs on Workstation, Player, or Fusion. The main thing to be aware of is how the serial port is exposed which is a unix pipe, and will need something like ncat to get into ktest's monitoring Signed-off-by: John 'Warthog9' Hawley (VMware) Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/examples/vmware.conf | 137 +++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 tools/testing/ktest/examples/vmware.conf (limited to 'tools') diff --git a/tools/testing/ktest/examples/vmware.conf b/tools/testing/ktest/examples/vmware.conf new file mode 100644 index 000000000000..61958163d242 --- /dev/null +++ b/tools/testing/ktest/examples/vmware.conf @@ -0,0 +1,137 @@ +# +# This config is an example usage of ktest.pl with a vmware guest +# +# VMware Setup: +# ------------- +# - Edit the Virtual Machine ("Edit virtual machine settings") +# - Add a Serial Port +# - You almost certainly want it set "Connect at power on" +# - Select "Use socket (named pipe)" +# - Select a name that you'll recognize, like 'ktestserialpipe' +# - From: Server +# - To: A Virtual Machine +# - Save +# - Make sure you note the name, it will be in the base directory of the +# virtual machine (where the "disks" are stored. The default +# is /var/lib/vmware// +# +# - Make note of the path to the VM +# +# +# The guest is called 'Guest' and this would be something that +# could be run on the host to test a virtual machine target. + +MACHINE = Guest + +# Name of the serial pipe you set in the VMware settings +VMWARE_SERIAL_NAME = + +# Define a variable of the name of the VM +# Noting this needs to be the name of the kmx file, and usually, the +# name of the directory that it's in. If the directory and name +# differ change the VMWARE_VM_DIR accordingly. +# Please ommit the .kmx extension +VMWARE_VM_NAME = + +# VM dir name. This is usually the same as the virtual machine's name, +# but not always the case. Change if they differ +VMWARE_VM_DIR = ${VMWARE_VM_NAME} + +# Base directory that the Virtual machine is contained in +# /var/lib/vmware is the default on Linux +VMWARE_VM_BASE_DIR = /var/lib/vmware/${VMWARE_VM_DIR} + +# Use ncat to read the unix pipe. Anything that can read the Unix Pipe +# and output it's contents to stdout will work +CONSOLE = /usr/bin/ncat -U ${VMWARE_VM_BASE_DIR}/${VMWARE_SERIAL_NAME} + +# Define what version of Workstation you are using +# This is used by vmrun to use the appropriate appripriate pieces to +# test this. In all likelihood you want 'ws' or 'player' +# Valid options: +# ws - Workstation (Windows or Linux host) +# fusion - Fusion (Mac host) +# player - Using VMware Player (Windows or Linux host) +# Note: vmrun has to run directly on the host machine +VMWARE_HOST_TYPE = ws + +# VMware provides `vmrun` to allow you to do certain things to the virtual machine +# This should hard reset the VM and force a boot +VMWARE_POWER_CYCLE = /usr/bin/vmrun -T ${VMWARE_HOST_TYPE} reset ${VMWARE_VM_BASE_DIR}/${VMWARE_VM_NAME}.kmx nogui + +#*************************************# +# This part is the same as test.conf # +#*************************************# + +# The include files will set up the type of test to run. Just set TEST to +# which test you want to run. +# +# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config +# +# See the include/*.conf files that define these tests +# +TEST := patchcheck + +# Some tests may have more than one test to run. Define MULTI := 1 to run +# the extra tests. +MULTI := 0 + +# In case you want to differentiate which type of system you are testing +BITS := 64 + +# REBOOT = none, error, fail, empty +# See include/defaults.conf +REBOOT := empty + + +# The defaults file will set up various settings that can be used by all +# machine configs. +INCLUDE include/defaults.conf + + +#*************************************# +# Now we are different from test.conf # +#*************************************# + + +# The example here assumes that Guest is running a Fedora release +# that uses dracut for its initfs. The POST_INSTALL will be executed +# after the install of the kernel and modules are complete. +# +POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION + +# Guests sometimes get stuck on reboot. We wait 3 seconds after running +# the reboot command and then do a full power-cycle of the guest. +# This forces the guest to restart. +# +POWERCYCLE_AFTER_REBOOT = 3 + +# We do the same after the halt command, but this time we wait 20 seconds. +POWEROFF_AFTER_HALT = 20 + + +# As the defaults.conf file has a POWER_CYCLE option already defined, +# and options can not be defined in the same section more than once +# (all DEFAULTS sections are considered the same). We use the +# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined +# options, for the options set in the OVERRIDE section. +# +DEFAULTS OVERRIDE + +# Instead of using the default POWER_CYCLE option defined in +# defaults.conf, we use virsh to cycle it. To do so, we destroy +# the guest, wait 5 seconds, and then start it up again. +# Crude, but effective. +# +POWER_CYCLE = ${VMWARE_POWER_CYCLE} + + +DEFAULTS + +# The following files each handle a different test case. +# Having them included allows you to set up more than one machine and share +# the same tests. +INCLUDE include/patchcheck.conf +INCLUDE include/tests.conf +INCLUDE include/bisect.conf +INCLUDE include/min-config.conf -- cgit v1.2.3 From becdd17b5acc79267cf4dba65e07e96e11cc9b57 Mon Sep 17 00:00:00 2001 From: "John 'Warthog9' Hawley (VMware)" Date: Mon, 19 Apr 2021 17:29:28 -0700 Subject: ktest: Adding editor hints to improve consistency Emacs and Vi(m) have different styles of dealing with perl syntax which can lead to slightly inconsistent indentation, and makes the code slightly harder to read. Emacs assumes a more perl recommended standard of 4 spaces (1 column) or tab (two column) indentation. Vi(m) tends to favor just normal spaces or tabs depending on what was being used. This gives the basic hinting to Emacs and Vim to do what is expected to be basically consistent. Emacs: - Explicitly flip into perl mode, cperl would require more adjustments Vi(m): - Set softtabs=4 which will flip it over to doing indentation the way you would expect from Emacs Signed-off-by: John 'Warthog9' Hawley (VMware) Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 18fd4fd117dd..14a753b86445 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -4520,3 +4520,12 @@ if (defined($opt{"LOG_FILE"})) { } exit 0; + +## +# The following are here to standardize tabs/spaces/etc across the most likely editors +### + +# Local Variables: +# mode: perl +# End: +# vim: softtabstop=4 -- cgit v1.2.3 From 12d4cddda2043466a5af8fc0c49e49f24f1d4c59 Mon Sep 17 00:00:00 2001 From: "John 'Warthog9' Hawley (VMware)" Date: Mon, 19 Apr 2021 17:29:29 -0700 Subject: ktest: Fixing indentation to match expected pattern This is a followup to "ktest: Adding editor hints to improve consistency" to actually adjust the existing indentation to match the, now, expected pattern (first column 4 spaces, 2nd tab, 3rd tab + 4 spaces, etc). This should, at least help, keep things consistent going forward now. Signed-off-by: John 'Warthog9' Hawley (VMware) Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 186 +++++++++++++++++++++---------------------- 1 file changed, 92 insertions(+), 94 deletions(-) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 14a753b86445..633c715173ff 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -760,7 +760,7 @@ sub process_variables { # remove the space added in the beginning $retval =~ s/ //; - return "$retval" + return "$retval"; } sub set_value { @@ -1095,7 +1095,7 @@ sub __read_config { } } } - + if ( ! -r $file ) { die "$name: $.: Can't read file $file\n$_"; } @@ -1186,13 +1186,13 @@ sub __read_config { } sub get_test_case { - print "What test case would you like to run?\n"; - print " (build, install or boot)\n"; - print " Other tests are available but require editing ktest.conf\n"; - print " (see tools/testing/ktest/sample.conf)\n"; - my $ans = ; - chomp $ans; - $default{"TEST_TYPE"} = $ans; + print "What test case would you like to run?\n"; + print " (build, install or boot)\n"; + print " Other tests are available but require editing ktest.conf\n"; + print " (see tools/testing/ktest/sample.conf)\n"; + my $ans = ; + chomp $ans; + $default{"TEST_TYPE"} = $ans; } sub read_config { @@ -1519,13 +1519,13 @@ sub dodie { close O; close L; } - send_email("KTEST: critical failure for test $i [$name]", - "Your test started at $script_start_time has failed with:\n@_\n", $log_file); + send_email("KTEST: critical failure for test $i [$name]", + "Your test started at $script_start_time has failed with:\n@_\n", $log_file); } if ($monitor_cnt) { - # restore terminal settings - system("stty $stty_orig"); + # restore terminal settings + system("stty $stty_orig"); } if (defined($post_test)) { @@ -1709,81 +1709,81 @@ sub wait_for_monitor { } sub save_logs { - my ($result, $basedir) = @_; - my @t = localtime; - my $date = sprintf "%04d%02d%02d%02d%02d%02d", - 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; + my ($result, $basedir) = @_; + my @t = localtime; + my $date = sprintf "%04d%02d%02d%02d%02d%02d", + 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0]; - my $type = $build_type; - if ($type =~ /useconfig/) { - $type = "useconfig"; - } + my $type = $build_type; + if ($type =~ /useconfig/) { + $type = "useconfig"; + } - my $dir = "$machine-$test_type-$type-$result-$date"; + my $dir = "$machine-$test_type-$type-$result-$date"; - $dir = "$basedir/$dir"; + $dir = "$basedir/$dir"; - if (!-d $dir) { - mkpath($dir) or - dodie "can't create $dir"; - } + if (!-d $dir) { + mkpath($dir) or + dodie "can't create $dir"; + } - my %files = ( - "config" => $output_config, - "buildlog" => $buildlog, - "dmesg" => $dmesg, - "testlog" => $testlog, - ); + my %files = ( + "config" => $output_config, + "buildlog" => $buildlog, + "dmesg" => $dmesg, + "testlog" => $testlog, + ); - while (my ($name, $source) = each(%files)) { - if (-f "$source") { - cp "$source", "$dir/$name" or - dodie "failed to copy $source"; - } + while (my ($name, $source) = each(%files)) { + if (-f "$source") { + cp "$source", "$dir/$name" or + dodie "failed to copy $source"; } + } - doprint "*** Saved info to $dir ***\n"; + doprint "*** Saved info to $dir ***\n"; } sub fail { - if ($die_on_failure) { - dodie @_; - } + if ($die_on_failure) { + dodie @_; + } - doprint "FAILED\n"; + doprint "FAILED\n"; - my $i = $iteration; + my $i = $iteration; - # no need to reboot for just building. - if (!do_not_reboot) { - doprint "REBOOTING\n"; - reboot_to_good $sleep_time; - } + # no need to reboot for just building. + if (!do_not_reboot) { + doprint "REBOOTING\n"; + reboot_to_good $sleep_time; + } - my $name = ""; + my $name = ""; - if (defined($test_name)) { - $name = " ($test_name)"; - } + if (defined($test_name)) { + $name = " ($test_name)"; + } - print_times; + print_times; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n"; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; + doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"; - if (defined($store_failures)) { - save_logs "fail", $store_failures; - } + if (defined($store_failures)) { + save_logs "fail", $store_failures; + } - if (defined($post_test)) { - run_command $post_test; - } + if (defined($post_test)) { + run_command $post_test; + } - return 1; + return 1; } sub run_command { @@ -1969,9 +1969,9 @@ sub get_grub_index { $target = '^menuentry.*' . $grub_menu_qt; $skip = '^menuentry\s|^submenu\s'; } elsif ($reboot_type eq "grub2bls") { - $command = $grub_bls_get; - $target = '^title=.*' . $grub_menu_qt; - $skip = '^title='; + $command = $grub_bls_get; + $target = '^title=.*' . $grub_menu_qt; + $skip = '^title='; } else { return; } @@ -2394,7 +2394,7 @@ sub check_buildlog { while () { if (/$check_build_re/) { my $warning = process_warning_line $_; - + $warnings_list{$warning} = 1; } } @@ -2659,7 +2659,7 @@ sub success { doprint "*******************************************\n"; if (defined($store_successes)) { - save_logs "success", $store_successes; + save_logs "success", $store_successes; } if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) { @@ -3246,13 +3246,13 @@ sub run_config_bisect { $ret = run_config_bisect_test $config_bisect_type; if ($ret) { - doprint "NEW GOOD CONFIG ($pass)\n"; + doprint "NEW GOOD CONFIG ($pass)\n"; system("cp $output_config $tmpdir/good_config.tmp.$pass"); $pass++; # Return 3 for good config return 3; } else { - doprint "NEW BAD CONFIG ($pass)\n"; + doprint "NEW BAD CONFIG ($pass)\n"; system("cp $output_config $tmpdir/bad_config.tmp.$pass"); $pass++; # Return 4 for bad config @@ -3371,7 +3371,7 @@ sub config_bisect { } while ($ret == 3 || $ret == 4); if ($ret == 2) { - config_bisect_end "$good_config.tmp", "$bad_config.tmp"; + config_bisect_end "$good_config.tmp", "$bad_config.tmp"; } return $ret if ($ret < 0); @@ -3551,7 +3551,6 @@ sub read_kconfig { my $cont = 0; my $line; - if (! -f $kconfig) { doprint "file $kconfig does not exist, skipping\n"; return; @@ -3660,7 +3659,7 @@ sub read_depends { if (! -f $kconfig && $arch =~ /\d$/) { my $orig = $arch; - # some subarchs have numbers, truncate them + # some subarchs have numbers, truncate them $arch =~ s/\d*$//; $kconfig = "$builddir/arch/$arch/Kconfig"; if (! -f $kconfig) { @@ -3855,7 +3854,7 @@ sub make_min_config { foreach my $config (@config_keys) { my $kconfig = chomp_config $config; if (!defined $depcount{$kconfig}) { - $depcount{$kconfig} = 0; + $depcount{$kconfig} = 0; } } @@ -3957,13 +3956,13 @@ sub make_min_config { my $failed = 0; build "oldconfig" or $failed = 1; if (!$failed) { - start_monitor_and_install or $failed = 1; + start_monitor_and_install or $failed = 1; - if ($type eq "test" && !$failed) { - do_run_test or $failed = 1; - } + if ($type eq "test" && !$failed) { + do_run_test or $failed = 1; + } - end_monitor; + end_monitor; } $in_bisect = 0; @@ -4277,8 +4276,8 @@ sub send_email { sub cancel_test { if ($email_when_canceled) { my $name = get_test_name; - send_email("KTEST: Your [$name] test was cancelled", - "Your test started at $script_start_time was cancelled: sig int"); + send_email("KTEST: Your [$name] test was cancelled", + "Your test started at $script_start_time was cancelled: sig int"); } die "\nCaught Sig Int, test interrupted: $!\n" } @@ -4326,15 +4325,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { # The first test may override the PRE_KTEST option if ($i == 1) { - if (defined($pre_ktest)) { - doprint "\n"; - run_command $pre_ktest; - } - if ($email_when_started) { + if (defined($pre_ktest)) { + doprint "\n"; + run_command $pre_ktest; + } + if ($email_when_started) { my $name = get_test_name; - send_email("KTEST: Your [$name] test was started", - "Your test was started on $script_start_time"); - } + send_email("KTEST: Your [$name] test was started", + "Your test was started on $script_start_time"); + } } # Any test can override the POST_KTEST option @@ -4506,12 +4505,11 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) { run_command $switch_to_good; } - doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; if ($email_when_finished) { send_email("KTEST: Your test has finished!", - "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); + "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); } if (defined($opt{"LOG_FILE"})) { -- cgit v1.2.3 From c043ccbfc6d83fa21512f842c5d2ba4060cee5fe Mon Sep 17 00:00:00 2001 From: "John 'Warthog9' Hawley (VMware)" Date: Mon, 19 Apr 2021 17:29:30 -0700 Subject: ktest: Further consistency cleanups This cleans up some additional whitespace pieces that to be more consistent, as well as moving a curly brace around, and some 'or' statements to match the rest of the file (usually or goes at the end of the line vs. at the beginning) Signed-off-by: John 'Warthog9' Hawley (VMware) Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 85 +++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 49 deletions(-) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 633c715173ff..a3c6ad64c479 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -24,7 +24,7 @@ my %evals; #default opts my %default = ( - "MAILER" => "sendmail", # default mailer + "MAILER" => "sendmail", # default mailer "EMAIL_ON_ERROR" => 1, "EMAIL_WHEN_FINISHED" => 1, "EMAIL_WHEN_CANCELED" => 0, @@ -36,15 +36,15 @@ my %default = ( "CLOSE_CONSOLE_SIGNAL" => "INT", "TIMEOUT" => 120, "TMP_DIR" => "/tmp/ktest/\${MACHINE}", - "SLEEP_TIME" => 60, # sleep time between tests + "SLEEP_TIME" => 60, # sleep time between tests "BUILD_NOCLEAN" => 0, "REBOOT_ON_ERROR" => 0, "POWEROFF_ON_ERROR" => 0, "REBOOT_ON_SUCCESS" => 1, "POWEROFF_ON_SUCCESS" => 0, "BUILD_OPTIONS" => "", - "BISECT_SLEEP_TIME" => 60, # sleep time between bisects - "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks + "BISECT_SLEEP_TIME" => 60, # sleep time between bisects + "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks "CLEAR_LOG" => 0, "BISECT_MANUAL" => 0, "BISECT_SKIP" => 1, @@ -537,7 +537,7 @@ sub read_prompt { my $ans; for (;;) { - if ($cancel) { + if ($cancel) { print "$prompt [y/n/C] "; } else { print "$prompt [Y/n] "; @@ -863,7 +863,6 @@ sub value_defined { defined($opt{$2}); } -my $d = 0; sub process_expression { my ($name, $val) = @_; @@ -978,7 +977,6 @@ sub __read_config { $override = 0; if ($type eq "TEST_START") { - if ($num_tests_set) { die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n"; } @@ -1048,7 +1046,6 @@ sub __read_config { $test_num = $old_test_num; $repeat = $old_repeat; } - } elsif (/^\s*ELSE\b(.*)$/) { if (!$if) { die "$name: $.: ELSE found with out matching IF section\n$_"; @@ -1471,7 +1468,6 @@ sub get_test_name() { } sub dodie { - # avoid recursion return if ($in_die); $in_die = 1; @@ -1481,10 +1477,8 @@ sub dodie { doprint "CRITICAL FAILURE... [TEST $i] ", @_, "\n"; if ($reboot_on_error && !do_not_reboot) { - doprint "REBOOTING\n"; reboot_to_good; - } elsif ($poweroff_on_error && defined($power_off)) { doprint "POWERING OFF\n"; `$power_off`; @@ -1519,8 +1513,9 @@ sub dodie { close O; close L; } + send_email("KTEST: critical failure for test $i [$name]", - "Your test started at $script_start_time has failed with:\n@_\n", $log_file); + "Your test started at $script_start_time has failed with:\n@_\n", $log_file); } if ($monitor_cnt) { @@ -1915,8 +1910,8 @@ sub _get_grub_index { my ($command, $target, $skip) = @_; return if (defined($grub_number) && defined($last_grub_menu) && - $last_grub_menu eq $grub_menu && defined($last_machine) && - $last_machine eq $machine); + $last_grub_menu eq $grub_menu && defined($last_machine) && + $last_machine eq $machine); doprint "Find $reboot_type menu ... "; $grub_number = -1; @@ -1924,8 +1919,8 @@ sub _get_grub_index { my $ssh_grub = $ssh_exec; $ssh_grub =~ s,\$SSH_COMMAND,$command,g; - open(IN, "$ssh_grub |") - or dodie "unable to execute $command"; + open(IN, "$ssh_grub |") or + dodie "unable to execute $command"; my $found = 0; @@ -1979,8 +1974,7 @@ sub get_grub_index { _get_grub_index($command, $target, $skip); } -sub wait_for_input -{ +sub wait_for_input { my ($fp, $time) = @_; my $start_time; my $rin; @@ -2096,7 +2090,6 @@ sub monitor { my $version_found = 0; while (!$done) { - if ($bug && defined($stop_after_failure) && $stop_after_failure >= 0) { my $time = $stop_after_failure - (time - $failure_start); @@ -2571,7 +2564,6 @@ sub build { run_command "mv $outputdir/config_temp $output_config" or dodie "moving config_temp"; } - } elsif (!$noclean) { unlink "$output_config"; run_command "$make mrproper" or @@ -2652,11 +2644,12 @@ sub success { print_times; - doprint "\n\n*******************************************\n"; - doprint "*******************************************\n"; - doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n"; - doprint "*******************************************\n"; - doprint "*******************************************\n"; + doprint "\n\n"; + doprint "*******************************************\n"; + doprint "*******************************************\n"; + doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n"; + doprint "*******************************************\n"; + doprint "*******************************************\n"; if (defined($store_successes)) { save_logs "success", $store_successes; @@ -3034,7 +3027,6 @@ sub bisect { } if ($do_check) { - # get current HEAD my $head = get_sha1("HEAD"); @@ -3074,13 +3066,11 @@ sub bisect { run_command "git bisect replay $replay" or dodie "failed to run replay"; } else { - run_command "git bisect good $good" or dodie "could not set bisect good to $good"; run_git_bisect "git bisect bad $bad" or dodie "could not set bisect bad to $bad"; - } if (defined($start)) { @@ -3133,8 +3123,8 @@ sub assign_configs { doprint "Reading configs from $config\n"; - open (IN, $config) - or dodie "Failed to read $config"; + open (IN, $config) or + dodie "Failed to read $config"; while () { chomp; @@ -3287,10 +3277,11 @@ sub config_bisect { if (!defined($config_bisect_exec)) { # First check the location that ktest.pl ran - my @locations = ( "$pwd/config-bisect.pl", - "$dirname/config-bisect.pl", - "$builddir/tools/testing/ktest/config-bisect.pl", - undef ); + my @locations = ( + "$pwd/config-bisect.pl", + "$dirname/config-bisect.pl", + "$builddir/tools/testing/ktest/config-bisect.pl", + undef ); foreach my $loc (@locations) { doprint "loc = $loc\n"; $config_bisect_exec = $loc; @@ -3632,8 +3623,8 @@ sub read_kconfig { sub read_depends { # find out which arch this is by the kconfig file - open (IN, $output_config) - or dodie "Failed to read $output_config"; + open (IN, $output_config) or + dodie "Failed to read $output_config"; my $arch; while () { if (m,Linux/(\S+)\s+\S+\s+Kernel Configuration,) { @@ -3708,7 +3699,6 @@ sub get_depends { my @configs; while ($dep =~ /[$valid]/) { - if ($dep =~ /^[^$valid]*([$valid]+)/) { my $conf = "CONFIG_" . $1; @@ -3889,7 +3879,6 @@ sub make_min_config { my $take_two = 0; while (!$done) { - my $config; my $found; @@ -3900,7 +3889,7 @@ sub make_min_config { # Sort keys by who is most dependent on @test_configs = sort { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} } - @test_configs ; + @test_configs ; # Put configs that did not modify the config at the end. my $reset = 1; @@ -3976,8 +3965,8 @@ sub make_min_config { # update new ignore configs if (defined($ignore_config)) { - open (OUT, ">$temp_config") - or dodie "Can't write to $temp_config"; + open (OUT, ">$temp_config") or + dodie "Can't write to $temp_config"; foreach my $config (keys %save_configs) { print OUT "$save_configs{$config}\n"; } @@ -4004,8 +3993,8 @@ sub make_min_config { } # Save off all the current mandatory configs - open (OUT, ">$temp_config") - or dodie "Can't write to $temp_config"; + open (OUT, ">$temp_config") or + dodie "Can't write to $temp_config"; foreach my $config (keys %keep_configs) { print OUT "$keep_configs{$config}\n"; } @@ -4043,7 +4032,6 @@ sub make_warnings_file { open(IN, $buildlog) or dodie "Can't open $buildlog"; while () { - # Some compilers use UTF-8 extended for quotes # for distcc heterogeneous systems, this causes issues s/$utf8_quote/'/g; @@ -4263,7 +4251,6 @@ sub do_send_mail { } sub send_email { - if (defined($mailto)) { if (!defined($mailer)) { doprint "No email sent: email or mailer not specified in config.\n"; @@ -4277,7 +4264,7 @@ sub cancel_test { if ($email_when_canceled) { my $name = get_test_name; send_email("KTEST: Your [$name] test was cancelled", - "Your test started at $script_start_time was cancelled: sig int"); + "Your test started at $script_start_time was cancelled: sig int"); } die "\nCaught Sig Int, test interrupted: $!\n" } @@ -4332,7 +4319,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { if ($email_when_started) { my $name = get_test_name; send_email("KTEST: Your [$name] test was started", - "Your test was started on $script_start_time"); + "Your test was started on $script_start_time"); } } @@ -4411,7 +4398,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { my $ret = run_command $pre_test; if (!$ret && defined($pre_test_die) && $pre_test_die) { - dodie "failed to pre_test\n"; + dodie "failed to pre_test\n"; } } @@ -4509,7 +4496,7 @@ doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; if ($email_when_finished) { send_email("KTEST: Your test has finished!", - "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); + "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); } if (defined($opt{"LOG_FILE"})) { -- cgit v1.2.3 From 6a0f3652952c7bba83af66c115a311d4a2164ebb Mon Sep 17 00:00:00 2001 From: "John 'Warthog9' Hawley (VMware)" Date: Mon, 19 Apr 2021 17:29:31 -0700 Subject: ktest: Re-arrange the code blocks for better discoverability Perl, as with most scripting languages, is fairly flexible in how / where you can define things, and it will (for the most part) do what you would expect it to do. This however can lead to situations, like with ktest, where things get muddled over time. This pushes the variable definitions back up to the top, followed by functions, with the main script executables down at the bottom, INSTEAD of being somewhat mish-mashed together in certain places. This mostly has the advantage of making it more obvious where things are initially defined, what functions are there, and ACTUALLY where the main script starts executing, and should make this a little more approachable. Signed-off-by: John 'Warthog9' Hawley (VMware) Signed-off-by: Steven Rostedt (VMware) --- tools/testing/ktest/ktest.pl | 296 ++++++++++++++++++++++--------------------- 1 file changed, 154 insertions(+), 142 deletions(-) (limited to 'tools') diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index a3c6ad64c479..09d1578f9d66 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -512,6 +512,69 @@ $config_help{"REBOOT_SCRIPT"} = << "EOF" EOF ; +# used with process_expression() +my $d = 0; + +# defined before get_test_name() +my $in_die = 0; + +# defined before process_warning_line() +my $check_build_re = ".*:.*(warning|error|Error):.*"; +my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})"; + +# defined before child_finished() +my $child_done; + +# config_ignore holds the configs that were set (or unset) for +# a good config and we will ignore these configs for the rest +# of a config bisect. These configs stay as they were. +my %config_ignore; + +# config_set holds what all configs were set as. +my %config_set; + +# config_off holds the set of configs that the bad config had disabled. +# We need to record them and set them in the .config when running +# olddefconfig, because olddefconfig keeps the defaults. +my %config_off; + +# config_off_tmp holds a set of configs to turn off for now +my @config_off_tmp; + +# config_list is the set of configs that are being tested +my %config_list; +my %null_config; + +my %dependency; + +# found above run_config_bisect() +my $pass = 1; + +# found above add_dep() + +my %depends; +my %depcount; +my $iflevel = 0; +my @ifdeps; + +# prevent recursion +my %read_kconfigs; + +# found above test_this_config() +my %min_configs; +my %keep_configs; +my %save_configs; +my %processed_configs; +my %nochange_config; + +# +# These are first defined here, main function later on +# +sub run_command; +sub start_monitor; +sub end_monitor; +sub wait_for_monitor; + sub _logit { if (defined($opt{"LOG_FILE"})) { print LOG @_; @@ -1365,11 +1428,6 @@ sub eval_option { return $option; } -sub run_command; -sub start_monitor; -sub end_monitor; -sub wait_for_monitor; - sub reboot { my ($time) = @_; my $powercycle = 0; @@ -1454,8 +1512,6 @@ sub do_not_reboot { ($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build"); } -my $in_die = 0; - sub get_test_name() { my $name; @@ -2342,9 +2398,6 @@ sub start_monitor_and_install { return monitor; } -my $check_build_re = ".*:.*(warning|error|Error):.*"; -my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})"; - sub process_warning_line { my ($line) = @_; @@ -2694,8 +2747,6 @@ sub child_run_test { exit $run_command_status; } -my $child_done; - sub child_finished { $child_done = 1; } @@ -3096,28 +3147,6 @@ sub bisect { success $i; } -# config_ignore holds the configs that were set (or unset) for -# a good config and we will ignore these configs for the rest -# of a config bisect. These configs stay as they were. -my %config_ignore; - -# config_set holds what all configs were set as. -my %config_set; - -# config_off holds the set of configs that the bad config had disabled. -# We need to record them and set them in the .config when running -# olddefconfig, because olddefconfig keeps the defaults. -my %config_off; - -# config_off_tmp holds a set of configs to turn off for now -my @config_off_tmp; - -# config_list is the set of configs that are being tested -my %config_list; -my %null_config; - -my %dependency; - sub assign_configs { my ($hash, $config) = @_; @@ -3212,8 +3241,6 @@ sub config_bisect_end { doprint "***************************************\n\n"; } -my $pass = 1; - sub run_config_bisect { my ($good, $bad, $last_result) = @_; my $reset = ""; @@ -3505,14 +3532,6 @@ sub patchcheck { return 1; } -my %depends; -my %depcount; -my $iflevel = 0; -my @ifdeps; - -# prevent recursion -my %read_kconfigs; - sub add_dep { # $config depends on $dep my ($config, $dep) = @_; @@ -3713,12 +3732,6 @@ sub get_depends { return @configs; } -my %min_configs; -my %keep_configs; -my %save_configs; -my %processed_configs; -my %nochange_config; - sub test_this_config { my ($config) = @_; @@ -4047,98 +4060,6 @@ sub make_warnings_file { success $i; } -$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n"; - -if ($#ARGV == 0) { - $ktest_config = $ARGV[0]; - if (! -f $ktest_config) { - print "$ktest_config does not exist.\n"; - if (!read_yn "Create it?") { - exit 0; - } - } -} - -if (! -f $ktest_config) { - $newconfig = 1; - get_test_case; - open(OUT, ">$ktest_config") or die "Can not create $ktest_config"; - print OUT << "EOF" -# Generated by ktest.pl -# - -# PWD is a ktest.pl variable that will result in the process working -# directory that ktest.pl is executed in. - -# THIS_DIR is automatically assigned the PWD of the path that generated -# the config file. It is best to use this variable when assigning other -# directory paths within this directory. This allows you to easily -# move the test cases to other locations or to other machines. -# -THIS_DIR := $variable{"PWD"} - -# Define each test with TEST_START -# The config options below it will override the defaults -TEST_START -TEST_TYPE = $default{"TEST_TYPE"} - -DEFAULTS -EOF -; - close(OUT); -} -read_config $ktest_config; - -if (defined($opt{"LOG_FILE"})) { - $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1); -} - -# Append any configs entered in manually to the config file. -my @new_configs = keys %entered_configs; -if ($#new_configs >= 0) { - print "\nAppending entered in configs to $ktest_config\n"; - open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config"; - foreach my $config (@new_configs) { - print OUT "$config = $entered_configs{$config}\n"; - $opt{$config} = process_variables($entered_configs{$config}); - } -} - -if (defined($opt{"LOG_FILE"})) { - if ($opt{"CLEAR_LOG"}) { - unlink $opt{"LOG_FILE"}; - } - open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; - LOG->autoflush(1); -} - -doprint "\n\nSTARTING AUTOMATED TESTS\n\n"; - -for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) { - - if (!$i) { - doprint "DEFAULT OPTIONS:\n"; - } else { - doprint "\nTEST $i OPTIONS"; - if (defined($repeat_tests{$i})) { - $repeat = $repeat_tests{$i}; - doprint " ITERATE $repeat"; - } - doprint "\n"; - } - - foreach my $option (sort keys %opt) { - - if ($option =~ /\[(\d+)\]$/) { - next if ($i != $1); - } else { - next if ($i); - } - - doprint "$option = $opt{$option}\n"; - } -} - sub option_defined { my ($option) = @_; @@ -4269,6 +4190,97 @@ sub cancel_test { die "\nCaught Sig Int, test interrupted: $!\n" } +$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n"; + +if ($#ARGV == 0) { + $ktest_config = $ARGV[0]; + if (! -f $ktest_config) { + print "$ktest_config does not exist.\n"; + if (!read_yn "Create it?") { + exit 0; + } + } +} + +if (! -f $ktest_config) { + $newconfig = 1; + get_test_case; + open(OUT, ">$ktest_config") or die "Can not create $ktest_config"; + print OUT << "EOF" +# Generated by ktest.pl +# + +# PWD is a ktest.pl variable that will result in the process working +# directory that ktest.pl is executed in. + +# THIS_DIR is automatically assigned the PWD of the path that generated +# the config file. It is best to use this variable when assigning other +# directory paths within this directory. This allows you to easily +# move the test cases to other locations or to other machines. +# +THIS_DIR := $variable{"PWD"} + +# Define each test with TEST_START +# The config options below it will override the defaults +TEST_START +TEST_TYPE = $default{"TEST_TYPE"} + +DEFAULTS +EOF +; + close(OUT); +} +read_config $ktest_config; + +if (defined($opt{"LOG_FILE"})) { + $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1); +} + +# Append any configs entered in manually to the config file. +my @new_configs = keys %entered_configs; +if ($#new_configs >= 0) { + print "\nAppending entered in configs to $ktest_config\n"; + open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config"; + foreach my $config (@new_configs) { + print OUT "$config = $entered_configs{$config}\n"; + $opt{$config} = process_variables($entered_configs{$config}); + } +} + +if (defined($opt{"LOG_FILE"})) { + if ($opt{"CLEAR_LOG"}) { + unlink $opt{"LOG_FILE"}; + } + open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}"; + LOG->autoflush(1); +} + +doprint "\n\nSTARTING AUTOMATED TESTS\n\n"; + +for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) { + + if (!$i) { + doprint "DEFAULT OPTIONS:\n"; + } else { + doprint "\nTEST $i OPTIONS"; + if (defined($repeat_tests{$i})) { + $repeat = $repeat_tests{$i}; + doprint " ITERATE $repeat"; + } + doprint "\n"; + } + + foreach my $option (sort keys %opt) { + if ($option =~ /\[(\d+)\]$/) { + next if ($i != $1); + } else { + next if ($i); + } + + doprint "$option = $opt{$option}\n"; + } +} + $SIG{INT} = qw(cancel_test); # First we need to do is the builds -- cgit v1.2.3 From 2af4f9b8596afbbd7667a18fa71d117bac227dea Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 30 Jan 2021 23:43:10 -0500 Subject: tools/power turbostat: add built-in-counter for IPC -- Instructions per Cycle Use linux-perf to access the hardware instructions-retired counter. This is necessary because the counter is not enabled by default, and also the counter is prone to roll-over -- both of which perf manages. It is not necessary to use perf for the cycle counter, because turbostat already needs to collect delta-aperf to calcuate frequency. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 84 +++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index a7c4f0772e53..b82295eaa744 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -33,10 +33,13 @@ #include #include #include +#include +#include char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; +int *fd_instr_count_percpu; struct timeval interval_tv = {5, 0}; struct timespec interval_ts = {5, 0}; unsigned int num_iterations; @@ -75,6 +78,7 @@ char *output_buffer, *outp; unsigned int do_rapl; unsigned int do_dts; unsigned int do_ptm; +unsigned int do_ipc; unsigned long long gfx_cur_rc6_ms; unsigned long long cpuidle_cur_cpu_lpi_us; unsigned long long cpuidle_cur_sys_lpi_us; @@ -173,6 +177,7 @@ struct thread_data { unsigned long long aperf; unsigned long long mperf; unsigned long long c1; + unsigned long long instr_count; unsigned long long irq_count; unsigned int smi_count; unsigned int cpu_id; @@ -490,6 +495,39 @@ int get_msr_fd(int cpu) return fd; } +static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) +{ + return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); +} + +static int perf_instr_count_open(int cpu_num) +{ + struct perf_event_attr pea; + int fd; + + memset(&pea, 0, sizeof(struct perf_event_attr)); + pea.type = PERF_TYPE_HARDWARE; + pea.size = sizeof(struct perf_event_attr); + pea.config = PERF_COUNT_HW_INSTRUCTIONS; + + /* counter for cpu_num, including user + kernel and all processes */ + fd = perf_event_open(&pea, -1, cpu_num, -1, 0); + if (fd == -1) + err(-1, "cpu%d: perf instruction counter\n", cpu_num); + + return fd; +} + +int get_instr_count_fd(int cpu) +{ + if (fd_instr_count_percpu[cpu]) + return fd_instr_count_percpu[cpu]; + + fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu); + + return fd_instr_count_percpu[cpu]; +} + int get_msr(int cpu, off_t offset, unsigned long long *msr) { ssize_t retval; @@ -561,6 +599,7 @@ struct msr_counter bic[] = { { 0x0, "X2APIC" }, { 0x0, "Die" }, { 0x0, "GFXAMHz" }, + { 0x0, "IPC" }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -616,6 +655,7 @@ struct msr_counter bic[] = { #define BIC_X2APIC (1ULL << 49) #define BIC_Die (1ULL << 50) #define BIC_GFXACTMHz (1ULL << 51) +#define BIC_IPC (1ULL << 52) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -627,6 +667,7 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) #define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) +#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) #define MAX_DEFERRED 16 @@ -764,6 +805,9 @@ void print_header(char *delim) if (DO_BIC(BIC_TSC_MHz)) outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_IPC)) + outp += sprintf(outp, "%sIPC", (printed++ ? delim : "")); + if (DO_BIC(BIC_IRQ)) { if (sums_need_wide_columns) outp += sprintf(outp, "%s IRQ", (printed++ ? delim : "")); @@ -926,6 +970,9 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "mperf: %016llX\n", t->mperf); outp += sprintf(outp, "c1: %016llX\n", t->c1); + if (DO_BIC(BIC_IPC)) + outp += sprintf(outp, "IPC: %lld\n", t->instr_count); + if (DO_BIC(BIC_IRQ)) outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); if (DO_BIC(BIC_SMI)) @@ -1105,6 +1152,9 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_TSC_MHz)) outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float); + if (DO_BIC(BIC_IPC)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); + /* IRQ */ if (DO_BIC(BIC_IRQ)) { if (sums_need_wide_columns) @@ -1482,6 +1532,9 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->mperf = 1; /* divide by 0 protection */ } + if (DO_BIC(BIC_IPC)) + old->instr_count = new->instr_count - old->instr_count; + if (DO_BIC(BIC_IRQ)) old->irq_count = new->irq_count - old->irq_count; @@ -1536,6 +1589,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->mperf = 0; t->c1 = 0; + t->instr_count = 0; + t->irq_count = 0; t->smi_count = 0; @@ -1611,6 +1666,8 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.threads.mperf += t->mperf; average.threads.c1 += t->c1; + average.threads.instr_count += t->instr_count; + average.threads.irq_count += t->irq_count; average.threads.smi_count += t->smi_count; @@ -1707,6 +1764,7 @@ void compute_average(struct thread_data *t, struct core_data *c, average.threads.tsc /= topo.num_cpus; average.threads.aperf /= topo.num_cpus; average.threads.mperf /= topo.num_cpus; + average.threads.instr_count /= topo.num_cpus; average.threads.c1 /= topo.num_cpus; if (average.threads.irq_count > 9999999) @@ -1989,6 +2047,10 @@ retry: t->mperf = t->mperf * aperf_mperf_multiplier; } + if (DO_BIC(BIC_IPC)) + if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) + return -4; + if (DO_BIC(BIC_IRQ)) t->irq_count = irqs_per_cpu[cpu]; if (DO_BIC(BIC_SMI)) { @@ -5031,6 +5093,26 @@ void print_dev_latency(void) close(fd); } + +/* + * Linux-perf manages the the HW instructions-retired counter + * by enabling when requested, and hiding rollover + */ +void linux_perf_init(void) +{ + if (!BIC_IS_ENABLED(BIC_IPC)) + return; + + if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) + return; + + fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); + if (fd_instr_count_percpu == NULL) + err(-1, "calloc fd_instr_count_percpu"); + + BIC_PRESENT(BIC_IPC); +} + void process_cpuid() { unsigned int eax, ebx, ecx, edx; @@ -5642,6 +5724,7 @@ void turbostat_init() check_dev_msr(); check_permissions(); process_cpuid(); + linux_perf_init(); if (!quiet) @@ -6087,6 +6170,7 @@ void cmdline(int argc, char **argv) {"debug", no_argument, 0, 'd'}, /* internal, not documented */ {"enable", required_argument, 0, 'e'}, {"interval", required_argument, 0, 'i'}, + {"IPC", no_argument, 0, 'I'}, {"num_iterations", required_argument, 0, 'n'}, {"help", no_argument, 0, 'h'}, {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help -- cgit v1.2.3 From ed0757b83a00d1799c249073d688b018b82d0093 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 4 Feb 2021 14:44:12 -0500 Subject: tools/power turbostat: print microcode patch level (also available via "grep microcode /proc/cpuinfo") Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b82295eaa744..e1bc7937b1ec 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5118,6 +5118,7 @@ void process_cpuid() unsigned int eax, ebx, ecx, edx; unsigned int fms, family, model, stepping, ecx_flags, edx_flags; unsigned int has_turbo; + unsigned long long ucode_patch = 0; eax = ebx = ecx = edx = 0; @@ -5131,8 +5132,8 @@ void process_cpuid() hygon_genuine = 1; if (!quiet) - fprintf(outf, "CPUID(0): %.4s%.4s%.4s ", - (char *)&ebx, (char *)&edx, (char *)&ecx); + fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", + (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); __cpuid(1, fms, ebx, ecx, edx); family = (fms >> 8) & 0xf; @@ -5145,6 +5146,9 @@ void process_cpuid() ecx_flags = ecx; edx_flags = edx; + if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) + warnx("get_msr(UCODE)\n"); + /* * check max extended function levels of CPUID. * This is needed to check for invariant TSC. @@ -5154,8 +5158,9 @@ void process_cpuid() __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); if (!quiet) { - fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", - max_level, max_extended_level, family, model, stepping, family, model, stepping); + fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n", + family, model, stepping, family, model, stepping, (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); + fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", ecx_flags & (1 << 0) ? "SSE3" : "-", ecx_flags & (1 << 3) ? "MONITOR" : "-", -- cgit v1.2.3 From 5683460b85a8a14c5eec10e363635ad4660eb961 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 3 Feb 2021 16:19:59 +0800 Subject: tools/power turbostat: Support Alder Lake Mobile Share the code between Alder Lake Mobile and Alder Lake Desktop. Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e1bc7937b1ec..a4745825047f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5056,6 +5056,7 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ROCKETLAKE: case INTEL_FAM6_LAKEFIELD: case INTEL_FAM6_ALDERLAKE: + case INTEL_FAM6_ALDERLAKE_L: return INTEL_FAM6_CANNONLAKE_L; case INTEL_FAM6_ATOM_TREMONT_L: -- cgit v1.2.3 From 6c5c656006cf314196faea7bd76eebbfa0941cd1 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 3 Feb 2021 16:26:32 +0800 Subject: tools/power turbostat: Support Ice Lake D Ice Lake D is low-end server version of Ice Lake X, reuse the code accordingly. Tested-by: Wendy Wang Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index a4745825047f..d27e899328f9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5063,6 +5063,7 @@ unsigned int intel_model_duplicates(unsigned int model) return INTEL_FAM6_ATOM_TREMONT; case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_ICELAKE_D: case INTEL_FAM6_SAPPHIRERAPIDS_X: return INTEL_FAM6_SKYLAKE_X; } -- cgit v1.2.3 From b2b94be787bf47eedd5890a249f3318bf9f1f1d5 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 11 Mar 2021 18:36:35 -0500 Subject: Revert "tools/power turbostat: adjust for temperature offset" This reverts commit 6ff7cb371c4bea3dba03a56d774da925e78a5087. Apparently the TCC offset should not be used to adjust what temperature we show the user after all. (on most systems, TCC offset is 0, FWIW) Fixes: 6ff7cb371c4b Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 62 +++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d27e899328f9..98a0a731da8a 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4879,33 +4879,12 @@ double discover_bclk(unsigned int family, unsigned int model) * below this value, including the Digital Thermal Sensor (DTS), * Package Thermal Management Sensor (PTM), and thermal event thresholds. */ -int read_tcc_activation_temp() +int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; - unsigned int tcc, target_c, offset_c; - - /* Temperature Target MSR is Nehalem and newer only */ - if (!do_nhm_platform_info) - return 0; - - if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) - return 0; - - target_c = (msr >> 16) & 0xFF; - - offset_c = (msr >> 24) & 0xF; - - tcc = target_c - offset_c; - - if (!quiet) - fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", - base_cpu, msr, tcc, target_c, offset_c); - - return tcc; -} + unsigned int target_c_local; + int cpu; -int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) -{ /* tcc_activation_temp is used only for dts or ptm */ if (!(do_dts || do_ptm)) return 0; @@ -4914,18 +4893,43 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; + cpu = t->cpu_id; + if (cpu_migrate(cpu)) { + fprintf(outf, "Could not migrate to CPU %d\n", cpu); + return -1; + } + if (tcc_activation_temp_override != 0) { tcc_activation_temp = tcc_activation_temp_override; - fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp); + fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", + cpu, tcc_activation_temp); return 0; } - tcc_activation_temp = read_tcc_activation_temp(); - if (tcc_activation_temp) - return 0; + /* Temperature Target MSR is Nehalem and newer only */ + if (!do_nhm_platform_info) + goto guess; + + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) + goto guess; + + target_c_local = (msr >> 16) & 0xFF; + + if (!quiet) + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", + cpu, msr, target_c_local); + + if (!target_c_local) + goto guess; + + tcc_activation_temp = target_c_local; + + return 0; +guess: tcc_activation_temp = TJMAX_DEFAULT; - fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp); + fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", + cpu, tcc_activation_temp); return 0; } -- cgit v1.2.3 From abdc75ab53b7fd2ef42c79e88cf0caf2d007c4f2 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Thu, 11 Mar 2021 10:05:13 +0800 Subject: tools/power turbostat: Fix DRAM Energy Unit on SKX SKX uses fixed DRAM Energy Unit, just like HSX and BDX. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 98a0a731da8a..5afe85efca5c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4272,6 +4272,7 @@ rapl_dram_energy_units_probe(int model, double rapl_energy_units) switch (model) { case INTEL_FAM6_HASWELL_X: /* HSX */ case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ return (rapl_dram_energy_units = 15.3 / 1000000); default: -- cgit v1.2.3 From ba58ecde5eec898f647bba7cb07e6ec6ea1b875c Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 12 Mar 2021 17:30:30 -0500 Subject: tools/power turbostat: update version number --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 5afe85efca5c..ace100dd5a83 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5834,7 +5834,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 20.09.30" + fprintf(outf, "turbostat version 21.03.12" " - Len Brown \n"); } -- cgit v1.2.3 From 301b1d3a9104f4f3a8ab4171cf88d0f55d632b41 Mon Sep 17 00:00:00 2001 From: Bas Nieuwenhuizen Date: Wed, 28 Apr 2021 17:09:03 +0800 Subject: tools/power/turbostat: Fix turbostat for AMD Zen CPUs It was reported that on Zen+ system turbostat started exiting, which was tracked down to the MSR_PKG_ENERGY_STAT read failing because offset_to_idx wasn't returning a non-negative index. This patch combined the modification from Bingsong Si and Bas Nieuwenhuizen and addd the MSR to the index system as alternative for MSR_PKG_ENERGY_STATUS. Fixes: 9972d5d84d76 ("tools/power turbostat: Enable accumulate RAPL display") Reported-by: youling257 Tested-by: youling257 Tested-by: Kurt Garloff Tested-by: Bingsong Si Tested-by: Artem S. Tashkinov Co-developed-by: Bingsong Si Co-developed-by: Terry Bowman Signed-off-by: Bas Nieuwenhuizen Reviewed-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ace100dd5a83..b5f4ec24fea9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -302,7 +302,10 @@ int idx_to_offset(int idx) switch (idx) { case IDX_PKG_ENERGY: - offset = MSR_PKG_ENERGY_STATUS; + if (do_rapl & RAPL_AMD_F17H) + offset = MSR_PKG_ENERGY_STAT; + else + offset = MSR_PKG_ENERGY_STATUS; break; case IDX_DRAM_ENERGY: offset = MSR_DRAM_ENERGY_STATUS; @@ -331,6 +334,7 @@ int offset_to_idx(int offset) switch (offset) { case MSR_PKG_ENERGY_STATUS: + case MSR_PKG_ENERGY_STAT: idx = IDX_PKG_ENERGY; break; case MSR_DRAM_ENERGY_STATUS: @@ -358,7 +362,7 @@ int idx_valid(int idx) { switch (idx) { case IDX_PKG_ENERGY: - return do_rapl & RAPL_PKG; + return do_rapl & (RAPL_PKG | RAPL_AMD_F17H); case IDX_DRAM_ENERGY: return do_rapl & RAPL_DRAM; case IDX_PP0_ENERGY: -- cgit v1.2.3 From 13a779de4175df602366d129e41782ad7168cef0 Mon Sep 17 00:00:00 2001 From: Calvin Walton Date: Wed, 28 Apr 2021 17:09:16 +0800 Subject: tools/power turbostat: Fix offset overflow issue in index converting The idx_to_offset() function returns type int (32-bit signed), but MSR_PKG_ENERGY_STAT is u32 and would be interpreted as a negative number. The end result is that it hits the if (offset < 0) check in update_msr_sum() which prevents the timer callback from updating the stat in the background when long durations are used. The similar issue exists in offset_to_idx() and update_msr_sum(). Fix this issue by converting the 'int' to 'off_t' accordingly. Fixes: 9972d5d84d76 ("tools/power turbostat: Enable accumulate RAPL display") Signed-off-by: Calvin Walton Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b5f4ec24fea9..f3cb06a115b5 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -296,9 +296,9 @@ struct msr_sum_array { /* The percpu MSR sum array.*/ struct msr_sum_array *per_cpu_msr_sum; -int idx_to_offset(int idx) +off_t idx_to_offset(int idx) { - int offset; + off_t offset; switch (idx) { case IDX_PKG_ENERGY: @@ -328,7 +328,7 @@ int idx_to_offset(int idx) return offset; } -int offset_to_idx(int offset) +int offset_to_idx(off_t offset) { int idx; @@ -3338,7 +3338,7 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { unsigned long long msr_cur, msr_last; - int offset; + off_t offset; if (!idx_valid(i)) continue; @@ -3347,7 +3347,8 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg continue; ret = get_msr(cpu, offset, &msr_cur); if (ret) { - fprintf(outf, "Can not update msr(0x%x)\n", offset); + fprintf(outf, "Can not update msr(0x%llx)\n", + (unsigned long long)offset); continue; } -- cgit v1.2.3 From 25368d7cefcd87a94ccabcc6f9f31796607bbe4e Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Tue, 4 May 2021 17:52:34 +0300 Subject: tools/power/turbostat: Remove Package C6 Retention on Ice Lake Server Currently the turbostat treats ICX the same way as SKX and shares the code among them. But one difference is that ICX does not support Package C6 Retention, unlike SKX and CLX. So this patch: 1. Splitting SKX and ICX in turbostat. 2. Removing Package C6 Rentention for ICX. And after this split, it would be easier to cutomize Ice Lake Server in turbostat in the future. Suggested-by: Artem Bityutskiy Signed-off-by: Chen Yu Reviewed-by: Artem Bityutskiy Tested-by: Artem Bityutskiy Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 36 +++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f3cb06a115b5..b43816e4c1ff 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2263,7 +2263,7 @@ int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; - +int icx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; static void calculate_tsc_tweak() @@ -2378,6 +2378,7 @@ int has_turbo_ratio_group_limits(int family, int model) switch (model) { case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_ICELAKE_X: case INTEL_FAM6_ATOM_GOLDMONT_D: case INTEL_FAM6_ATOM_TREMONT_D: return 1; @@ -3618,6 +3619,10 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) pkg_cstate_limits = skx_pkg_cstate_limits; has_misc_feature_control = 1; break; + case INTEL_FAM6_ICELAKE_X: /* ICX */ + pkg_cstate_limits = icx_pkg_cstate_limits; + has_misc_feature_control = 1; + break; case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ no_MSR_MISC_PWR_MGMT = 1; case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ @@ -3706,6 +3711,20 @@ int is_skx(unsigned int family, unsigned int model) } return 0; } + +int is_icx(unsigned int family, unsigned int model) +{ + + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_ICELAKE_X: + return 1; + } + return 0; +} + int is_ehl(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -3808,6 +3827,7 @@ int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model) switch (model) { case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_ICELAKE_X: return 1; default: return 0; @@ -3833,6 +3853,7 @@ int has_config_tdp(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ return 1; @@ -4363,6 +4384,7 @@ void rapl_probe_intel(unsigned int family, unsigned int model) case INTEL_FAM6_HASWELL_X: /* HSX */ case INTEL_FAM6_BROADWELL_X: /* BDX */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; BIC_PRESENT(BIC_PKG__); @@ -4519,7 +4541,8 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model) void automatic_cstate_conversion_probe(unsigned int family, unsigned int model) { - if (is_skx(family, model) || is_bdx(family, model)) + if (is_skx(family, model) || is_bdx(family, model) || + is_icx(family, model)) has_automatic_cstate_conversion = 1; } @@ -4734,6 +4757,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ @@ -5072,10 +5096,9 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ATOM_TREMONT_L: return INTEL_FAM6_ATOM_TREMONT; - case INTEL_FAM6_ICELAKE_X: case INTEL_FAM6_ICELAKE_D: case INTEL_FAM6_SAPPHIRERAPIDS_X: - return INTEL_FAM6_SKYLAKE_X; + return INTEL_FAM6_ICELAKE_X; } return model; } @@ -5185,8 +5208,9 @@ void process_cpuid() edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); } - if (genuine_intel) + if (genuine_intel) { model = intel_model_duplicates(model); + } if (!(edx_flags & (1 << 5))) errx(1, "CPUID: no MSR"); @@ -5364,7 +5388,7 @@ void process_cpuid() BIC_NOT_PRESENT(BIC_Pkgpc7); use_c1_residency_msr = 1; } - if (is_skx(family, model)) { + if (is_skx(family, model) || is_icx(family, model)) { BIC_NOT_PRESENT(BIC_CPU_c3); BIC_NOT_PRESENT(BIC_Pkgpc3); BIC_NOT_PRESENT(BIC_CPU_c7); -- cgit v1.2.3 From 1e3ec5cdfb63bc2a1ff06145faa2be08d6ec9594 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 25 Mar 2021 13:13:33 -0700 Subject: tools/power turbostat: unmark non-kernel-doc comment Do not mark a comment as kernel-doc notation when it is not meant to be in kernel-doc notation. Signed-off-by: Randy Dunlap Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index b43816e4c1ff..407e80e72546 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2516,7 +2516,7 @@ dump_knl_turbo_ratio_limits(void) fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); - /** + /* * Turbo encoding in KNL is as follows: * [0] -- Reserved * [7:1] -- Base value of number of active cores of bucket 1. -- cgit v1.2.3 From 8c69da293041352d15a2b6e8010c141822a416c5 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 28 Apr 2021 10:51:57 +0800 Subject: tools/power turbostat: Enable tsc_tweak for Elkhart Lake and Jasper Lake It was found that on Elkhart Lake the TSC frequency is driven by a separate crystal-clock domain, which is different from the BCLK domain which includes mperf. This has result in small different speed thus inconsistence between TSC and the mperf, which caused the Busy% to be higher than 100%. On this platform it seems that the mperf runs faster than tsc when the CPU is 100% utilized: delta tsc(18815473183) < delta mperf(18958403680) for 10 seconds. To align TSC with mperf, leverage the tsc_tweak mechanism introduced for cores newer than Skylake, so that TSC and mperf would be calculated in the same domain. Reported-by: Zhang Rui Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 407e80e72546..9ec13f06c0f3 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5440,7 +5440,7 @@ void process_cpuid() if (!quiet) dump_sysfs_pstate_config(); - if (has_skl_msrs(family, model)) + if (has_skl_msrs(family, model) || is_ehl(family, model)) calculate_tsc_tweak(); if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) -- cgit v1.2.3 From aeb01e6d71ffaf3011ac755c3083cc200ed57cb4 Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Wed, 28 Apr 2021 12:18:12 +0800 Subject: tools/power turbostat: Print the C-state Pre-wake settings C-state pre-wake setting[1] is an optimization for some Intel CPUs to be woken up from deep C-states in order to reduce latency. According to the spec, the BIT30 is the C-state Pre-wake Disable. Expose this setting accordingly. Sample output from turbostat: ... cpu51: MSR_IA32_POWER_CTL: 0x1a00a40059 (C1E auto-promotion: DISabled) C-state Pre-wake: ENabled cpu51: MSR_TURBO_RATIO_LIMIT: 0x2021212121212224 ... [1] https://intel.github.io/wult/#c-state-pre-wake Signed-off-by: Chen Yu Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9ec13f06c0f3..e1ed14c666db 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -91,6 +91,7 @@ double rapl_dram_energy_units, rapl_energy_units; double rapl_joule_counter_range; unsigned int do_core_perf_limit_reasons; unsigned int has_automatic_cstate_conversion; +unsigned int dis_cstate_prewake; unsigned int do_gfx_perf_limit_reasons; unsigned int do_ring_perf_limit_reasons; unsigned int crystal_hz; @@ -2271,6 +2272,8 @@ calculate_tsc_tweak() tsc_tweak = base_hz / tsc_hz; } +void prewake_cstate_probe(unsigned int family, unsigned int model); + static void dump_nhm_platform_info(void) { @@ -2293,6 +2296,11 @@ dump_nhm_platform_info(void) fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); + /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ + if (dis_cstate_prewake) + fprintf(outf, "C-state Pre-wake: %sabled\n", + msr & 0x40000000 ? "DIS" : "EN"); + return; } @@ -4546,6 +4554,12 @@ void automatic_cstate_conversion_probe(unsigned int family, unsigned int model) has_automatic_cstate_conversion = 1; } +void prewake_cstate_probe(unsigned int family, unsigned int model) +{ + if (is_icx(family, model)) + dis_cstate_prewake = 1; +} + int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; -- cgit v1.2.3 From 7ab5ff4937a338783d147ec2d8c8714f48a5de79 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 21 Apr 2021 22:22:47 +0800 Subject: tools/power turbostat: Fix Core C6 residency on Atom CPUs For Atom CPUs that have core cstate deeper than C6, MSR_CORE_C6_RESIDENCY actually returns the residency for both CC6 and deeper Core cstates. Thus, the real Core C6 residency should be the subtraction of MSR_CORE_C6_RESIDENCY return value and MSR_CORE_C6_RESIDENCY return value. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 39 ++++++++++++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index e1ed14c666db..ee18966f65a4 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -35,6 +35,7 @@ #include #include #include +#include char *proc_stat = "/proc/stat"; FILE *outf; @@ -185,6 +186,7 @@ struct thread_data { unsigned int apic_id; unsigned int x2apic_id; unsigned int flags; + bool is_atom; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 #define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; @@ -2090,9 +2092,19 @@ retry: return -7; } - if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) + if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) { if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) return -8; + else if (t->is_atom) { + /* + * For Atom CPUs that has core cstate deeper than c6, + * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper. + * Minus CC7 (and deeper cstates) residency to get + * accturate cc6 residency. + */ + c->c6 -= c->c7; + } + } if (DO_BIC(BIC_Mod_c6)) if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) @@ -4911,6 +4923,28 @@ double discover_bclk(unsigned int family, unsigned int model) return 133.33; } +int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned int eax, ebx, ecx, edx; + + if (!genuine_intel) + return 0; + + if (cpu_migrate(t->cpu_id)) { + fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); + return -1; + } + + if (max_level < 0x1a) + return 0; + + __cpuid(0x1a, eax, ebx, ecx, edx); + eax = (eax >> 24) & 0xFF; + if (eax == 0x20 ) + t->is_atom = true; + return 0; +} + /* * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where * the Thermal Control Circuit (TCC) activates. @@ -5796,6 +5830,9 @@ void turbostat_init() for_all_cpus(set_temperature_target, ODD_COUNTERS); + for_all_cpus(get_cpu_type, ODD_COUNTERS); + for_all_cpus(get_cpu_type, EVEN_COUNTERS); + if (!quiet) for_all_cpus(print_thermal, ODD_COUNTERS); -- cgit v1.2.3 From e9d3092f6d7c21031c8ac10ba2016ae0482a39fe Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 26 Apr 2021 10:05:27 +0800 Subject: tools/power turbostat: save original CPU model CPU model may get changed in intel_model_duplicates() for code reuse. But there are still some cases we need the original CPU model to handle minor differences between generations. Thus save the original CPU model. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ee18966f65a4..d1ae6b248377 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -43,6 +43,10 @@ int *fd_percpu; int *fd_instr_count_percpu; struct timeval interval_tv = {5, 0}; struct timespec interval_ts = {5, 0}; + +/* Save original CPU model */ +unsigned int model_orig; + unsigned int num_iterations; unsigned int debug; unsigned int quiet; @@ -5257,6 +5261,7 @@ void process_cpuid() edx_flags & (1 << 29) ? "TM" : "-"); } if (genuine_intel) { + model_orig = model; model = intel_model_duplicates(model); } -- cgit v1.2.3 From 0b9a0b9be991656f125b58a240065cdf72077244 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Wed, 21 Apr 2021 23:22:14 +0800 Subject: tools/power turbostat: add TCC Offset support The length of TCC Offset bits varies on different platforms. Decode TCC Offset bits only for the platforms that we have verified. For the others, only show default TCC activation temperature. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 58 +++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d1ae6b248377..6326bee97c0b 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -91,6 +91,7 @@ unsigned int gfx_cur_mhz; unsigned int gfx_act_mhz; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; +int tcc_offset_bits; double rapl_power_units, rapl_time_units; double rapl_dram_energy_units, rapl_energy_units; double rapl_joule_counter_range; @@ -3886,6 +3887,40 @@ int has_config_tdp(unsigned int family, unsigned int model) } } +/* + * tcc_offset_bits: + * 0: Tcc Offset not supported (Default) + * 6: Bit 29:24 of MSR_PLATFORM_INFO + * 4: Bit 27:24 of MSR_PLATFORM_INFO + */ +void check_tcc_offset(int model) +{ + unsigned long long msr; + + if (!genuine_intel) + return; + + switch (model) { + case INTEL_FAM6_SKYLAKE_L: + case INTEL_FAM6_SKYLAKE: + case INTEL_FAM6_KABYLAKE_L: + case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_ICELAKE_L: + case INTEL_FAM6_ICELAKE: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_COMETLAKE: + if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) { + msr = (msr >> 30) & 1; + if (msr) + tcc_offset_bits = 6; + } + return; + default: + return; + } +} + static void remove_underbar(char *s) { @@ -4964,7 +4999,7 @@ int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; - unsigned int target_c_local; + unsigned int target_c_local, tcc_offset; int cpu; /* tcc_activation_temp is used only for dts or ptm */ @@ -4997,9 +5032,24 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk target_c_local = (msr >> 16) & 0xFF; - if (!quiet) - fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", + if (!quiet) { + switch (tcc_offset_bits) { + case 4: + tcc_offset = (msr >> 24) & 0xF; + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", + cpu, msr, target_c_local - tcc_offset, target_c_local, tcc_offset); + break; + case 6: + tcc_offset = (msr >> 24) & 0x3F; + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", + cpu, msr, target_c_local - tcc_offset, target_c_local, tcc_offset); + break; + default: + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, target_c_local); + break; + } + } if (!target_c_local) goto guess; @@ -5483,6 +5533,8 @@ void process_cpuid() perf_limit_reasons_probe(family, model); automatic_cstate_conversion_probe(family, model); + check_tcc_offset(model_orig); + if (!quiet) dump_cstate_pstate_config_info(family, model); -- cgit v1.2.3 From 55279aef754c5eab170077ae4ba4ebd304dea64f Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 26 Apr 2021 18:49:26 +0800 Subject: tools/power turbostat: rename tcc variables There are two TCC activation temeprature. One is the default TCC activation temperature, also known as TJ_MAX. Another one is the effective TCC activation temperature, which is the subtraction of default TCC activation temperature and TCC offset. The name of variable tcc_activation_temp might be misleading here. Thus rename tcc_activation_temp to tj_max, and use tcc_default and tcc_offset to calculate the effective TCC activation temperature. No functional change in this patch. Signed-off-by: Zhang Rui Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 44 +++++++++++++++++------------------ 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 6326bee97c0b..8f0a1d8a0366 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -89,8 +89,8 @@ unsigned long long cpuidle_cur_cpu_lpi_us; unsigned long long cpuidle_cur_sys_lpi_us; unsigned int gfx_cur_mhz; unsigned int gfx_act_mhz; -unsigned int tcc_activation_temp; -unsigned int tcc_activation_temp_override; +unsigned int tj_max; +unsigned int tj_max_override; int tcc_offset_bits; double rapl_power_units, rapl_time_units; double rapl_dram_energy_units, rapl_energy_units; @@ -2118,7 +2118,7 @@ retry: if (DO_BIC(BIC_CoreTmp)) { if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) return -9; - c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); + c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); } if (do_rapl & RAPL_AMD_F17H) { @@ -2224,7 +2224,7 @@ retry: if (DO_BIC(BIC_PkgTmp)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) return -17; - p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); + p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); } if (DO_BIC(BIC_GFX_rc6)) @@ -4637,7 +4637,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", - cpu, msr, tcc_activation_temp - dts); + cpu, msr, tj_max - dts); if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) return 0; @@ -4645,7 +4645,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); + cpu, msr, tj_max - dts, tj_max - dts2); } @@ -4658,7 +4658,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; resolution = (msr >> 27) & 0xF; fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", - cpu, msr, tcc_activation_temp - dts, resolution); + cpu, msr, tj_max - dts, resolution); if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) return 0; @@ -4666,7 +4666,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p dts = (msr >> 16) & 0x7F; dts2 = (msr >> 8) & 0x7F; fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", - cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); + cpu, msr, tj_max - dts, tj_max - dts2); } return 0; @@ -4999,10 +4999,10 @@ int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) { unsigned long long msr; - unsigned int target_c_local, tcc_offset; + unsigned int tcc_default, tcc_offset; int cpu; - /* tcc_activation_temp is used only for dts or ptm */ + /* tj_max is used only for dts or ptm */ if (!(do_dts || do_ptm)) return 0; @@ -5016,10 +5016,10 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk return -1; } - if (tcc_activation_temp_override != 0) { - tcc_activation_temp = tcc_activation_temp_override; + if (tj_max_override != 0) { + tj_max = tj_max_override; fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", - cpu, tcc_activation_temp); + cpu, tj_max); return 0; } @@ -5030,38 +5030,38 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) goto guess; - target_c_local = (msr >> 16) & 0xFF; + tcc_default = (msr >> 16) & 0xFF; if (!quiet) { switch (tcc_offset_bits) { case 4: tcc_offset = (msr >> 24) & 0xF; fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", - cpu, msr, target_c_local - tcc_offset, target_c_local, tcc_offset); + cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); break; case 6: tcc_offset = (msr >> 24) & 0x3F; fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", - cpu, msr, target_c_local - tcc_offset, target_c_local, tcc_offset); + cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); break; default: fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", - cpu, msr, target_c_local); + cpu, msr, tcc_default); break; } } - if (!target_c_local) + if (!tcc_default) goto guess; - tcc_activation_temp = target_c_local; + tj_max = tcc_default; return 0; guess: - tcc_activation_temp = TJMAX_DEFAULT; + tj_max = TJMAX_DEFAULT; fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", - cpu, tcc_activation_temp); + cpu, tj_max); return 0; } @@ -6421,7 +6421,7 @@ void cmdline(int argc, char **argv) summary_only++; break; case 'T': - tcc_activation_temp_override = atoi(optarg); + tj_max_override = atoi(optarg); break; case 'v': print_version(); -- cgit v1.2.3 From 1b439f01b67c77a374adbbd97ad0c745b7abb09b Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 4 May 2021 19:21:34 -0400 Subject: tools/power turbostat: formatting Spring is here... run a long overdue Lendent on turbostat.c no functional change Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 943 ++++++++++++++++------------------ 1 file changed, 433 insertions(+), 510 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 8f0a1d8a0366..13805e460a4d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -41,8 +41,8 @@ char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; int *fd_instr_count_percpu; -struct timeval interval_tv = {5, 0}; -struct timespec interval_ts = {5, 0}; +struct timeval interval_tv = { 5, 0 }; +struct timespec interval_ts = { 5, 0 }; /* Save original CPU model */ unsigned int model_orig; @@ -84,7 +84,7 @@ unsigned int do_rapl; unsigned int do_dts; unsigned int do_ptm; unsigned int do_ipc; -unsigned long long gfx_cur_rc6_ms; +unsigned long long gfx_cur_rc6_ms; unsigned long long cpuidle_cur_cpu_lpi_us; unsigned long long cpuidle_cur_sys_lpi_us; unsigned int gfx_cur_mhz; @@ -104,12 +104,12 @@ unsigned int crystal_hz; unsigned long long tsc_hz; int base_cpu; double discover_bclk(unsigned int family, unsigned int model); -unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ +unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ -unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ +unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ -unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ -unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ +unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ +unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ unsigned int has_misc_feature_control; unsigned int first_counter_read = 1; int ignore_stdin; @@ -185,7 +185,7 @@ struct thread_data { unsigned long long mperf; unsigned long long c1; unsigned long long instr_count; - unsigned long long irq_count; + unsigned long long irq_count; unsigned int smi_count; unsigned int cpu_id; unsigned int apic_id; @@ -253,12 +253,11 @@ struct pkg_data { ((node_no) * topo.cores_per_node) + \ (core_no)) - #define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) -enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE}; -enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC}; -enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT}; +enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; +enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC }; +enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT }; struct msr_counter { unsigned int msr_num; @@ -294,9 +293,9 @@ int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); struct msr_sum_array { /* get_msr_sum() = sum + (get_msr() - last) */ struct { - /*The accumulated MSR value is updated by the timer*/ + /*The accumulated MSR value is updated by the timer */ unsigned long long sum; - /*The MSR footprint recorded in last timer*/ + /*The MSR footprint recorded in last timer */ unsigned long long last; } entries[IDX_COUNT]; }; @@ -385,6 +384,7 @@ int idx_valid(int idx) return 0; } } + struct sys_counters { unsigned int added_thread_counters; unsigned int added_core_counters; @@ -408,7 +408,7 @@ struct cpu_topology { int logical_node_id; /* 0-based count within the package */ int physical_core_id; int thread_id; - cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ + cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ } *cpus; struct topo_params { @@ -425,7 +425,7 @@ struct topo_params { struct timeval tv_even, tv_odd, tv_delta; -int *irq_column_2_cpu; /* /proc/interrupts column numbers */ +int *irq_column_2_cpu; /* /proc/interrupts column numbers */ int *irqs_per_cpu; /* indexed by cpu_num */ void setup_all_buffers(void); @@ -438,34 +438,31 @@ int cpu_is_not_present(int cpu) { return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); } + /* * run func(thread, core, package) in topology order * skip non-present cpus */ -int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), - struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) +int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), + struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) { int retval, pkg_no, core_no, thread_no, node_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { - for (thread_no = 0; thread_no < - topo.threads_per_core; ++thread_no) { + for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t; struct core_data *c; struct pkg_data *p; - t = GET_THREAD(thread_base, thread_no, - core_no, node_no, - pkg_no); + t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); if (cpu_is_not_present(t->cpu_id)) continue; - c = GET_CORE(core_base, core_no, - node_no, pkg_no); + c = GET_CORE(core_base, core_no, node_no, pkg_no); p = GET_PKG(pkg_base, pkg_no); retval = func(t, c, p); @@ -487,6 +484,7 @@ int cpu_migrate(int cpu) else return 0; } + int get_msr_fd(int cpu) { char pathname[32]; @@ -524,7 +522,7 @@ static int perf_instr_count_open(int cpu_num) /* counter for cpu_num, including user + kernel and all processes */ fd = perf_event_open(&pea, -1, cpu_num, -1, 0); - if (fd == -1) + if (fd == -1) err(-1, "cpu%d: perf instruction counter\n", cpu_num); return fd; @@ -568,7 +566,7 @@ struct msr_counter bic[] = { { 0x0, "Bzy_MHz" }, { 0x0, "TSC_MHz" }, { 0x0, "IRQ" }, - { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL}, + { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL }, { 0x0, "sysfs" }, { 0x0, "CPU%c1" }, { 0x0, "CPU%c3" }, @@ -681,7 +679,6 @@ unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC #define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) #define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) - #define MAX_DEFERRED 16 char *deferred_skip_names[MAX_DEFERRED]; int deferred_skip_index; @@ -695,42 +692,40 @@ enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST; void help(void) { fprintf(outf, - "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" - "\n" - "Turbostat forks the specified COMMAND and prints statistics\n" - "when COMMAND completes.\n" - "If no COMMAND is specified, turbostat wakes every 5-seconds\n" - "to print statistics, until interrupted.\n" - " -a, --add add a counter\n" - " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" - " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" - " {core | package | j,k,l..m,n-p }\n" - " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" - " -D, --Dump displays the raw counter values\n" - " -e, --enable [all | column]\n" - " shows all or the specified disabled column\n" - " -H, --hide [column|column,column,...]\n" - " hide the specified column(s)\n" - " -i, --interval sec.subsec\n" - " Override default 5-second measurement interval\n" - " -J, --Joules displays energy in Joules instead of Watts\n" - " -l, --list list column headers only\n" - " -n, --num_iterations num\n" - " number of the measurement iterations\n" - " -o, --out file\n" - " create or truncate \"file\" for all output\n" - " -q, --quiet skip decoding system configuration header\n" - " -s, --show [column|column,column,...]\n" - " show only the specified column(s)\n" - " -S, --Summary\n" - " limits output to 1-line system summary per interval\n" - " -T, --TCC temperature\n" - " sets the Thermal Control Circuit temperature in\n" - " degrees Celsius\n" - " -h, --help print this help message\n" - " -v, --version print version information\n" - "\n" - "For more help, run \"man turbostat\"\n"); + "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" + "\n" + "Turbostat forks the specified COMMAND and prints statistics\n" + "when COMMAND completes.\n" + "If no COMMAND is specified, turbostat wakes every 5-seconds\n" + "to print statistics, until interrupted.\n" + " -a, --add add a counter\n" + " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" + " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" + " {core | package | j,k,l..m,n-p }\n" + " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" + " -D, --Dump displays the raw counter values\n" + " -e, --enable [all | column]\n" + " shows all or the specified disabled column\n" + " -H, --hide [column|column,column,...]\n" + " hide the specified column(s)\n" + " -i, --interval sec.subsec\n" + " Override default 5-second measurement interval\n" + " -J, --Joules displays energy in Joules instead of Watts\n" + " -l, --list list column headers only\n" + " -n, --num_iterations num\n" + " number of the measurement iterations\n" + " -o, --out file\n" + " create or truncate \"file\" for all output\n" + " -q, --quiet skip decoding system configuration header\n" + " -s, --show [column|column,column,...]\n" + " show only the specified column(s)\n" + " -S, --Summary\n" + " limits output to 1-line system summary per interval\n" + " -T, --TCC temperature\n" + " sets the Thermal Control Circuit temperature in\n" + " degrees Celsius\n" + " -h, --help print this help message\n" + " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); } /* @@ -784,7 +779,6 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) return retval; } - void print_header(char *delim) { struct msr_counter *mp; @@ -966,8 +960,7 @@ void print_header(char *delim) outp += sprintf(outp, "\n"); } -int dump_counters(struct thread_data *t, struct core_data *c, - struct pkg_data *p) +int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int i; struct msr_counter *mp; @@ -975,8 +968,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); if (t) { - outp += sprintf(outp, "CPU: %d flags 0x%x\n", - t->cpu_id, t->flags); + outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); outp += sprintf(outp, "TSC: %016llX\n", t->tsc); outp += sprintf(outp, "aperf: %016llX\n", t->aperf); outp += sprintf(outp, "mperf: %016llX\n", t->mperf); @@ -991,8 +983,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "SMI: %d\n", t->smi_count); for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { - outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", - i, mp->msr_num, t->counter[i]); + outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]); } } @@ -1005,8 +996,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "Joules: %0X\n", c->core_energy); for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { - outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", - i, mp->msr_num, c->counter[i]); + outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]); } outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); } @@ -1035,15 +1025,12 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores); outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx); outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram); - outp += sprintf(outp, "Throttle PKG: %0llX\n", - p->rapl_pkg_perf_status); - outp += sprintf(outp, "Throttle RAM: %0llX\n", - p->rapl_dram_perf_status); + outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status); + outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { - outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", - i, mp->msr_num, p->counter[i]); + outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]); } } @@ -1055,8 +1042,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, /* * column formatting convention & formats */ -int format_counters(struct thread_data *t, struct core_data *c, - struct pkg_data *p) +int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { double interval_float, tsc; char *fmt8; @@ -1065,17 +1051,16 @@ int format_counters(struct thread_data *t, struct core_data *c, char *delim = "\t"; int printed = 0; - /* if showing only 1st thread in core and this isn't one, bail out */ + /* if showing only 1st thread in core and this isn't one, bail out */ if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; - /* if showing only 1st thread in pkg and this isn't one, bail out */ + /* if showing only 1st thread in pkg and this isn't one, bail out */ if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; /*if not summary line and --cpu is used */ - if ((t != &average.threads) && - (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) + if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) return 0; if (DO_BIC(BIC_USEC)) { @@ -1090,7 +1075,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_TOD)) outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); - interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec/1000000.0; + interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0; tsc = t->tsc * tsc_tweak; @@ -1126,11 +1111,9 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_Node)) { if (t) outp += sprintf(outp, "%s%d", - (printed++ ? delim : ""), - cpus[t->cpu_id].physical_node_id); + (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); else - outp += sprintf(outp, "%s-", - (printed++ ? delim : "")); + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); } if (DO_BIC(BIC_Core)) { if (c) @@ -1147,22 +1130,22 @@ int format_counters(struct thread_data *t, struct core_data *c, } if (DO_BIC(BIC_Avg_MHz)) - outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), - 1.0 / units * t->aperf / interval_float); + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); if (DO_BIC(BIC_Busy)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); if (DO_BIC(BIC_Bzy_MHz)) { if (has_base_hz) - outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); + outp += + sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); else outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), - tsc / units * t->aperf / t->mperf / interval_float); + tsc / units * t->aperf / t->mperf / interval_float); } if (DO_BIC(BIC_TSC_MHz)) - outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float); + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float); if (DO_BIC(BIC_IPC)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); @@ -1183,7 +1166,8 @@ int format_counters(struct thread_data *t, struct core_data *c, for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]); + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); else outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); } else if (mp->format == FORMAT_DELTA) { @@ -1193,27 +1177,28 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); } else if (mp->format == FORMAT_PERCENT) { if (mp->type == COUNTER_USEC) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000); + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), + t->counter[i] / interval_float / 10000); else - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); } } /* C1 */ if (DO_BIC(BIC_CPU_c1)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc); - + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); /* print per-core data only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; if (DO_BIC(BIC_CPU_c3)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); if (DO_BIC(BIC_CPU_c6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); if (DO_BIC(BIC_CPU_c7)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); /* Mod%c6 */ if (DO_BIC(BIC_Mod_c6)) @@ -1225,7 +1210,8 @@ int format_counters(struct thread_data *t, struct core_data *c, for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]); + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); else outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); } else if (mp->format == FORMAT_DELTA) { @@ -1234,14 +1220,15 @@ int format_counters(struct thread_data *t, struct core_data *c, else outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); } } fmt8 = "%s%.2f"; if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units); @@ -1259,7 +1246,7 @@ int format_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); } else { outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), - p->gfx_rc6_ms / 10.0 / interval_float); + p->gfx_rc6_ms / 10.0 / interval_float); } } @@ -1273,42 +1260,49 @@ int format_counters(struct thread_data *t, struct core_data *c, /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (DO_BIC(BIC_Totl_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); if (DO_BIC(BIC_Any_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); if (DO_BIC(BIC_GFX_c0)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); if (DO_BIC(BIC_CPUGFX)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); if (DO_BIC(BIC_Pkgpc2)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); if (DO_BIC(BIC_Pkgpc3)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); if (DO_BIC(BIC_Pkgpc6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); if (DO_BIC(BIC_Pkgpc7)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); if (DO_BIC(BIC_Pkgpc8)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); if (DO_BIC(BIC_Pkgpc9)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); if (DO_BIC(BIC_Pkgpc10)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); if (DO_BIC(BIC_CPU_LPI)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float); + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float); if (DO_BIC(BIC_SYS_LPI)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); if (DO_BIC(BIC_PkgWatt)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); if (DO_BIC(BIC_GFXWatt)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float); + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float); if (DO_BIC(BIC_RAMWatt)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float); + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), + p->energy_dram * rapl_dram_energy_units / interval_float); if (DO_BIC(BIC_Pkg_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units); if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) @@ -1318,14 +1312,19 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_RAM_J)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units); if (DO_BIC(BIC_PKG__)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), + 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (DO_BIC(BIC_RAM__)) - outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), + 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) { if (mp->width == 32) - outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]); + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); else outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); } else if (mp->format == FORMAT_DELTA) { @@ -1334,7 +1333,7 @@ int format_counters(struct thread_data *t, struct core_data *c, else outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); } else if (mp->format == FORMAT_PERCENT) { - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc); + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); } } @@ -1359,12 +1358,14 @@ void flush_output_stdout(void) outp = output_buffer; } + void flush_output_stderr(void) { fputs(output_buffer, outf); fflush(outf); outp = output_buffer; } + void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { static int printed; @@ -1385,13 +1386,11 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ #define DELTA_WRAP32(new, old) \ old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); -int -delta_package(struct pkg_data *new, struct pkg_data *old) +int delta_package(struct pkg_data *new, struct pkg_data *old) { int i; struct msr_counter *mp; - if (DO_BIC(BIC_Totl_c0)) old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; if (DO_BIC(BIC_Any_c0)) @@ -1416,7 +1415,7 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->pkg_temp_c = new->pkg_temp_c; /* flag an error when rc6 counter resets/wraps */ - if (old->gfx_rc6_ms > new->gfx_rc6_ms) + if (old->gfx_rc6_ms > new->gfx_rc6_ms) old->gfx_rc6_ms = -1; else old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; @@ -1441,8 +1440,7 @@ delta_package(struct pkg_data *new, struct pkg_data *old) return 0; } -void -delta_core(struct core_data *new, struct core_data *old) +void delta_core(struct core_data *new, struct core_data *old) { int i; struct msr_counter *mp; @@ -1474,9 +1472,7 @@ int soft_c1_residency_display(int bic) /* * old = new - old */ -int -delta_thread(struct thread_data *new, struct thread_data *old, - struct core_data *core_delta) +int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) { int i; struct msr_counter *mp; @@ -1507,8 +1503,7 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->c1 = new->c1 - old->c1; - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || - soft_c1_residency_display(BIC_Avg_MHz)) { + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) { if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { old->aperf = new->aperf - old->aperf; old->mperf = new->mperf - old->mperf; @@ -1517,7 +1512,6 @@ delta_thread(struct thread_data *new, struct thread_data *old, } } - if (use_c1_residency_msr) { /* * Some models have a dedicated C1 residency MSR, @@ -1534,7 +1528,7 @@ delta_thread(struct thread_data *new, struct thread_data *old, else { /* normal case, derive c1 */ old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 - - core_delta->c6 - core_delta->c7; + - core_delta->c6 - core_delta->c7; } } @@ -1563,8 +1557,7 @@ delta_thread(struct thread_data *new, struct thread_data *old, } int delta_cpu(struct thread_data *t, struct core_data *c, - struct pkg_data *p, struct thread_data *t2, - struct core_data *c2, struct pkg_data *p2) + struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) { int retval = 0; @@ -1587,7 +1580,7 @@ int delta_cpu(struct thread_data *t, struct core_data *c, void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int i; - struct msr_counter *mp; + struct msr_counter *mp; t->tv_begin.tv_sec = 0; t->tv_begin.tv_usec = 0; @@ -1654,8 +1647,8 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) p->counter[i] = 0; } -int sum_counters(struct thread_data *t, struct core_data *c, - struct pkg_data *p) + +int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int i; struct msr_counter *mp; @@ -1756,12 +1749,12 @@ int sum_counters(struct thread_data *t, struct core_data *c, } return 0; } + /* * sum the counters for all cpus in the system * compute the weighted average */ -void compute_average(struct thread_data *t, struct core_data *c, - struct pkg_data *p) +void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int i; struct msr_counter *mp; @@ -1842,7 +1835,7 @@ static unsigned long long rdtsc(void) { unsigned int low, high; - asm volatile("rdtsc" : "=a" (low), "=d" (high)); + asm volatile ("rdtsc":"=a" (low), "=d"(high)); return low | ((unsigned long long)high) << 32; } @@ -1858,6 +1851,7 @@ FILE *fopen_or_die(const char *path, const char *mode) err(1, "%s: open failed", path); return filep; } + /* * snapshot_sysfs_counter() * @@ -1889,8 +1883,7 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) char path[128 + PATH_BYTES]; if (mp->flags & SYSFS_PERCPU) { - sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", - cpu, mp->path); + sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path); *counterp = snapshot_sysfs_counter(path); } else { @@ -1950,7 +1943,7 @@ void get_apic_id(struct thread_data *t) eax = ebx = ecx = edx = 0; __cpuid(0x80000001, eax, ebx, ecx, edx); - topology_extensions = ecx & (1 << 22); + topology_extensions = ecx & (1 << 22); if (topology_extensions == 0) return; @@ -1973,8 +1966,7 @@ void get_apic_id(struct thread_data *t) t->x2apic_id = edx; if (debug && (t->apic_id != (t->x2apic_id & 0xff))) - fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", - t->cpu_id, t->apic_id, t->x2apic_id); + fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); } /* @@ -2002,8 +1994,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) retry: t->tsc = rdtsc(); /* we are running on local CPU of interest */ - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || - soft_c1_residency_display(BIC_Avg_MHz)) { + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) { unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; /* @@ -2050,8 +2041,7 @@ retry: if (aperf_mperf_retry_count < 5) goto retry; else - warnx("cpu%d jitter %lld %lld", - cpu, aperf_time, mperf_time); + warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); } aperf_mperf_retry_count = 0; @@ -2252,47 +2242,64 @@ done: * (>= PCL__7) and to index pkg_cstate_limit_strings[]. */ -#define PCLUKN 0 /* Unknown */ -#define PCLRSV 1 /* Reserved */ -#define PCL__0 2 /* PC0 */ -#define PCL__1 3 /* PC1 */ -#define PCL__2 4 /* PC2 */ -#define PCL__3 5 /* PC3 */ -#define PCL__4 6 /* PC4 */ -#define PCL__6 7 /* PC6 */ -#define PCL_6N 8 /* PC6 No Retention */ -#define PCL_6R 9 /* PC6 Retention */ -#define PCL__7 10 /* PC7 */ -#define PCL_7S 11 /* PC7 Shrink */ -#define PCL__8 12 /* PC8 */ -#define PCL__9 13 /* PC9 */ -#define PCL_10 14 /* PC10 */ -#define PCLUNL 15 /* Unlimited */ +#define PCLUKN 0 /* Unknown */ +#define PCLRSV 1 /* Reserved */ +#define PCL__0 2 /* PC0 */ +#define PCL__1 3 /* PC1 */ +#define PCL__2 4 /* PC2 */ +#define PCL__3 5 /* PC3 */ +#define PCL__4 6 /* PC4 */ +#define PCL__6 7 /* PC6 */ +#define PCL_6N 8 /* PC6 No Retention */ +#define PCL_6R 9 /* PC6 Retention */ +#define PCL__7 10 /* PC7 */ +#define PCL_7S 11 /* PC7 Shrink */ +#define PCL__8 12 /* PC8 */ +#define PCL__9 13 /* PC9 */ +#define PCL_10 14 /* PC10 */ +#define PCLUNL 15 /* Unlimited */ int pkg_cstate_limit = PCLUKN; char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", - "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"}; - -int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7}; -int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int icx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; + "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" +}; -static void -calculate_tsc_tweak() +int nhm_pkg_cstate_limits[16] = + { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +PCLRSV, PCLRSV }; +int snb_pkg_cstate_limits[16] = + { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +PCLRSV, PCLRSV }; +int hsw_pkg_cstate_limits[16] = + { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +PCLRSV, PCLRSV }; +int slv_pkg_cstate_limits[16] = + { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +PCL__6, PCL__7 }; +int amt_pkg_cstate_limits[16] = + { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +PCLRSV, PCLRSV }; +int phi_pkg_cstate_limits[16] = + { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +PCLRSV, PCLRSV }; +int glm_pkg_cstate_limits[16] = + { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +PCLRSV, PCLRSV }; +int skx_pkg_cstate_limits[16] = + { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +PCLRSV, PCLRSV }; +int icx_pkg_cstate_limits[16] = + { PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, +PCLRSV, PCLRSV }; + +static void calculate_tsc_tweak() { tsc_tweak = base_hz / tsc_hz; } void prewake_cstate_probe(unsigned int family, unsigned int model); -static void -dump_nhm_platform_info(void) +static void dump_nhm_platform_info(void) { unsigned long long msr; unsigned int ratio; @@ -2302,12 +2309,10 @@ dump_nhm_platform_info(void) fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); ratio = (msr >> 40) & 0xFF; - fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; - fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", @@ -2315,14 +2320,12 @@ dump_nhm_platform_info(void) /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ if (dis_cstate_prewake) - fprintf(outf, "C-state Pre-wake: %sabled\n", - msr & 0x40000000 ? "DIS" : "EN"); + fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); return; } -static void -dump_hsw_turbo_ratio_limits(void) +static void dump_hsw_turbo_ratio_limits(void) { unsigned long long msr; unsigned int ratio; @@ -2333,18 +2336,15 @@ dump_hsw_turbo_ratio_limits(void) ratio = (msr >> 8) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); return; } -static void -dump_ivt_turbo_ratio_limits(void) +static void dump_ivt_turbo_ratio_limits(void) { unsigned long long msr; unsigned int ratio; @@ -2355,45 +2355,38 @@ dump_ivt_turbo_ratio_limits(void) ratio = (msr >> 56) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 48) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 40) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 32) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 24) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 16) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0xFF; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); return; } + int has_turbo_ratio_group_limits(int family, int model) { @@ -2411,8 +2404,7 @@ int has_turbo_ratio_group_limits(int family, int model) return 0; } -static void -dump_turbo_ratio_limits(int family, int model) +static void dump_turbo_ratio_limits(int family, int model) { unsigned long long msr, core_counts; unsigned int ratio, group_size; @@ -2477,8 +2469,7 @@ dump_turbo_ratio_limits(int family, int model) return; } -static void -dump_atom_turbo_ratio_limits(void) +static void dump_atom_turbo_ratio_limits(void) { unsigned long long msr; unsigned int ratio; @@ -2488,45 +2479,37 @@ dump_atom_turbo_ratio_limits(void) ratio = (msr >> 0) & 0x3F; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0x3F; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk); ratio = (msr >> 16) & 0x3F; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); ratio = (msr >> 24) & 0x3F; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 16) & 0x3F; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0x3F; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); ratio = (msr >> 0) & 0x3F; if (ratio) - fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", - ratio, bclk, ratio * bclk); + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk); } -static void -dump_knl_turbo_ratio_limits(void) +static void dump_knl_turbo_ratio_limits(void) { const unsigned int buckets_no = 7; @@ -2538,8 +2521,7 @@ dump_knl_turbo_ratio_limits(void) get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); - fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", - base_cpu, msr); + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); /* * Turbo encoding in KNL is as follows: @@ -2584,8 +2566,7 @@ dump_knl_turbo_ratio_limits(void) ratio[i], bclk, ratio[i] * bclk, cores[i]); } -static void -dump_nhm_cst_cfg(void) +static void dump_nhm_cst_cfg(void) { unsigned long long msr; @@ -2598,14 +2579,11 @@ dump_nhm_cst_cfg(void) (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", - (msr & (1 << 15)) ? "" : "UN", - (unsigned int)msr & 0xF, - pkg_cstate_limit_strings[pkg_cstate_limit]); + (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); #define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) if (has_automatic_cstate_conversion) { - fprintf(outf, ", automatic c-state conversion=%s", - (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); + fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); } fprintf(outf, ")\n"); @@ -2613,8 +2591,7 @@ dump_nhm_cst_cfg(void) return; } -static void -dump_config_tdp(void) +static void dump_config_tdp(void) { unsigned long long msr; @@ -2656,7 +2633,7 @@ dump_config_tdp(void) fprintf(outf, ")\n"); } -unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; +unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; void print_irtl(void) { @@ -2696,6 +2673,7 @@ void print_irtl(void) (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); } + void free_fd_percpu(void) { int i; @@ -2752,7 +2730,6 @@ void free_all_buffers(void) free(cpus); } - /* * Parse a file containing a single int. * Return 0 if file can not be opened @@ -2827,8 +2804,7 @@ void set_node_data(void) * the logical_node_id */ for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { - if ((cpus[cpux].physical_package_id == pkg) && - (cpus[cpux].physical_node_id == node)) { + if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) { cpus[cpux].logical_node_id = lnode; cpu_count++; } @@ -2850,8 +2826,7 @@ int get_physical_node_id(struct cpu_topology *thiscpu) int cpu = thiscpu->logical_cpu_id; for (i = 0; i <= topo.max_cpu_num; i++) { - sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", - cpu, i); + sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i); filep = fopen(path, "r"); if (!filep) continue; @@ -2881,8 +2856,7 @@ int get_thread_siblings(struct cpu_topology *thiscpu) size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); CPU_ZERO_S(size, thiscpu->put_ids); - sprintf(path, - "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); filep = fopen(path, "r"); if (!filep) { @@ -2899,10 +2873,8 @@ int get_thread_siblings(struct cpu_topology *thiscpu) sib_core = get_core_id(so); if (sib_core == thiscpu->physical_core_id) { CPU_SET_S(so, size, thiscpu->put_ids); - if ((so != cpu) && - (cpus[so].thread_id < 0)) - cpus[so].thread_id = - thread_id++; + if ((so != cpu) && (cpus[so].thread_id < 0)) + cpus[so].thread_id = thread_id++; } } } @@ -2917,41 +2889,31 @@ int get_thread_siblings(struct cpu_topology *thiscpu) * skip non-present cpus */ -int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, - struct pkg_data *, struct thread_data *, struct core_data *, - struct pkg_data *), struct thread_data *thread_base, - struct core_data *core_base, struct pkg_data *pkg_base, - struct thread_data *thread_base2, struct core_data *core_base2, - struct pkg_data *pkg_base2) +int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, + struct pkg_data *, struct thread_data *, struct core_data *, + struct pkg_data *), struct thread_data *thread_base, + struct core_data *core_base, struct pkg_data *pkg_base, + struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) { int retval, pkg_no, node_no, core_no, thread_no; for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { - for (core_no = 0; core_no < topo.cores_per_node; - ++core_no) { - for (thread_no = 0; thread_no < - topo.threads_per_core; ++thread_no) { + for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { + for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { struct thread_data *t, *t2; struct core_data *c, *c2; struct pkg_data *p, *p2; - t = GET_THREAD(thread_base, thread_no, - core_no, node_no, - pkg_no); + t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); if (cpu_is_not_present(t->cpu_id)) continue; - t2 = GET_THREAD(thread_base2, thread_no, - core_no, node_no, - pkg_no); + t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); - c = GET_CORE(core_base, core_no, - node_no, pkg_no); - c2 = GET_CORE(core_base2, core_no, - node_no, - pkg_no); + c = GET_CORE(core_base, core_no, node_no, pkg_no); + c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); p = GET_PKG(pkg_base, pkg_no); p2 = GET_PKG(pkg_base2, pkg_no); @@ -2970,7 +2932,7 @@ int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, * run func(cpu) on every cpu in /proc/stat * return max_cpu number */ -int for_all_proc_cpus(int (func)(int)) +int for_all_proc_cpus(int (func) (int)) { FILE *fp; int cpu_num; @@ -2990,7 +2952,7 @@ int for_all_proc_cpus(int (func)(int)) retval = func(cpu_num); if (retval) { fclose(fp); - return(retval); + return (retval); } } fclose(fp); @@ -3014,16 +2976,14 @@ void set_max_cpu_num(void) base_cpu = sched_getcpu(); if (base_cpu < 0) err(1, "cannot find calling cpu ID"); - sprintf(pathname, - "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", - base_cpu); + sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu); filep = fopen_or_die(pathname, "r"); topo.max_cpu_num = 0; while (fscanf(filep, "%lx,", &dummy) == 1) topo.max_cpu_num += BITMASK_SIZE; fclose(filep); - topo.max_cpu_num--; /* 0 based */ + topo.max_cpu_num--; /* 0 based */ } /* @@ -3035,6 +2995,7 @@ int count_cpus(int cpu) topo.num_cpus++; return 0; } + int mark_cpu_present(int cpu) { CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); @@ -3104,12 +3065,12 @@ int snapshot_proc_interrupts(void) } - while (getc(fp) != '\n') - ; /* flush interrupt description */ + while (getc(fp) != '\n') ; /* flush interrupt description */ } return 0; } + /* * snapshot_gfx_rc6_ms() * @@ -3133,6 +3094,7 @@ int snapshot_gfx_rc6_ms(void) return 0; } + /* * snapshot_gfx_mhz() * @@ -3212,6 +3174,7 @@ int snapshot_cpu_lpi_us(void) return 0; } + /* * snapshot_sys_lpi() * @@ -3235,6 +3198,7 @@ int snapshot_sys_lpi_us(void) return 0; } + /* * snapshot /proc and /sys files * @@ -3266,7 +3230,7 @@ int snapshot_proc_sysfs_files(void) int exit_requested; -static void signal_handler (int signal) +static void signal_handler(int signal) { switch (signal) { case SIGINT: @@ -3373,8 +3337,7 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg continue; ret = get_msr(cpu, offset, &msr_cur); if (ret) { - fprintf(outf, "Can not update msr(0x%llx)\n", - (unsigned long long)offset); + fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset); continue; } @@ -3387,8 +3350,7 @@ static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg return 0; } -static void -msr_record_handler(union sigval v) +static void msr_record_handler(union sigval v) { for_all_cpus(update_msr_sum, EVEN_COUNTERS); } @@ -3433,9 +3395,9 @@ void msr_sum_record(void) } return; - release_timer: +release_timer: timer_delete(timerid); - release_msr: +release_msr: free(per_cpu_msr_sum); } @@ -3527,7 +3489,7 @@ void check_dev_msr() sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); if (stat(pathname, &sb)) - if (system("/sbin/modprobe msr > /dev/null 2>&1")) + if (system("/sbin/modprobe msr > /dev/null 2>&1")) err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); } @@ -3549,8 +3511,7 @@ int check_for_cap_sys_rawio(void) err(-6, "cap_get\n"); if (cap_flag_value != CAP_SET) { - warnx("capget(CAP_SYS_RAWIO) failed," - " try \"# setcap cap_sys_rawio=ep %s\"", progname); + warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname); return 1; } @@ -3559,6 +3520,7 @@ int check_for_cap_sys_rawio(void) return 0; } + void check_permissions(void) { int do_exit = 0; @@ -3664,7 +3626,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ pkg_cstate_limits = glm_pkg_cstate_limits; break; default: @@ -3680,6 +3642,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) has_base_hz = 1; return 1; } + /* * SLV client has support for unique MSRs: * @@ -3700,6 +3663,7 @@ int has_slv_msrs(unsigned int family, unsigned int model) } return 0; } + int is_dnv(unsigned int family, unsigned int model) { @@ -3712,6 +3676,7 @@ int is_dnv(unsigned int family, unsigned int model) } return 0; } + int is_bdx(unsigned int family, unsigned int model) { @@ -3724,6 +3689,7 @@ int is_bdx(unsigned int family, unsigned int model) } return 0; } + int is_skx(unsigned int family, unsigned int model) { @@ -3761,6 +3727,7 @@ int is_ehl(unsigned int family, unsigned int model) } return 0; } + int is_jvl(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -3779,7 +3746,7 @@ int has_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; switch (model) { - /* Nehalem compatible, but do not include turbo-ratio limit support */ + /* Nehalem compatible, but do not include turbo-ratio limit support */ case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */ return 0; @@ -3787,6 +3754,7 @@ int has_turbo_ratio_limit(unsigned int family, unsigned int model) return 1; } } + int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model) { if (has_slv_msrs(family, model)) @@ -3794,6 +3762,7 @@ int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; } + int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -3810,6 +3779,7 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; } } + int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -3841,6 +3811,7 @@ int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; } } + int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -3858,6 +3829,7 @@ int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; } } + int has_config_tdp(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -3921,8 +3893,7 @@ void check_tcc_offset(int model) } } -static void -remove_underbar(char *s) +static void remove_underbar(char *s) { char *to = s; @@ -3935,8 +3906,7 @@ remove_underbar(char *s) *to = 0; } -static void -dump_cstate_pstate_config_info(unsigned int family, unsigned int model) +static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model) { if (!do_nhm_platform_info) return; @@ -3981,8 +3951,8 @@ static void dump_sysfs_file(char *path) fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); } -static void -dump_sysfs_cstate_config(void) + +static void dump_sysfs_cstate_config(void) { char path[64]; char name_buf[16]; @@ -4002,15 +3972,14 @@ dump_sysfs_cstate_config(void) for (state = 0; state < 10; ++state) { - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", - base_cpu, state); + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); input = fopen(path, "r"); if (input == NULL) continue; if (!fgets(name_buf, sizeof(name_buf), input)) err(1, "%s: failed to read file", path); - /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); if (!sp) sp = strchrnul(name_buf, '\n'); @@ -4019,8 +3988,7 @@ dump_sysfs_cstate_config(void) remove_underbar(name_buf); - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", - base_cpu, state); + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); input = fopen(path, "r"); if (input == NULL) continue; @@ -4031,8 +3999,8 @@ dump_sysfs_cstate_config(void) fclose(input); } } -static void -dump_sysfs_pstate_config(void) + +static void dump_sysfs_pstate_config(void) { char path[64]; char driver_buf[64]; @@ -4040,8 +4008,7 @@ dump_sysfs_pstate_config(void) FILE *input; int turbo; - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", - base_cpu); + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu); input = fopen(path, "r"); if (input == NULL) { fprintf(outf, "NSFOD %s\n", path); @@ -4051,8 +4018,7 @@ dump_sysfs_pstate_config(void) err(1, "%s: failed to read file", path); fclose(input); - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", - base_cpu); + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); input = fopen(path, "r"); if (input == NULL) { fprintf(outf, "NSFOD %s\n", path); @@ -4084,7 +4050,6 @@ dump_sysfs_pstate_config(void) } } - /* * print_epb() * Decode the ENERGY_PERF_BIAS MSR @@ -4130,6 +4095,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; } + /* * print_hwp() * Decode the MSR_HWP_CAPABILITIES @@ -4156,8 +4122,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PM_ENABLE, &msr)) return 0; - fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", - cpu, msr, (msr & (1 << 0)) ? "" : "No-"); + fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ if ((msr & (1 << 0)) == 0) @@ -4167,25 +4132,23 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " - "(high %d guar %d eff %d low %d)\n", - cpu, msr, - (unsigned int)HWP_HIGHEST_PERF(msr), - (unsigned int)HWP_GUARANTEED_PERF(msr), - (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), - (unsigned int)HWP_LOWEST_PERF(msr)); + "(high %d guar %d eff %d low %d)\n", + cpu, msr, + (unsigned int)HWP_HIGHEST_PERF(msr), + (unsigned int)HWP_GUARANTEED_PERF(msr), + (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) return 0; fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " - "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", - cpu, msr, - (unsigned int)(((msr) >> 0) & 0xff), - (unsigned int)(((msr) >> 8) & 0xff), - (unsigned int)(((msr) >> 16) & 0xff), - (unsigned int)(((msr) >> 24) & 0xff), - (unsigned int)(((msr) >> 32) & 0xff3), - (unsigned int)(((msr) >> 42) & 0x1)); + "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", + cpu, msr, + (unsigned int)(((msr) >> 0) & 0xff), + (unsigned int)(((msr) >> 8) & 0xff), + (unsigned int)(((msr) >> 16) & 0xff), + (unsigned int)(((msr) >> 24) & 0xff), + (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); if (has_hwp_pkg) { if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) @@ -4197,8 +4160,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) (unsigned int)(((msr) >> 0) & 0xff), (unsigned int)(((msr) >> 8) & 0xff), (unsigned int)(((msr) >> 16) & 0xff), - (unsigned int)(((msr) >> 24) & 0xff), - (unsigned int)(((msr) >> 32) & 0xff3)); + (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); } if (has_hwp_notify) { if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) @@ -4206,18 +4168,14 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", - cpu, msr, - ((msr) & 0x1) ? "EN" : "Dis", - ((msr) & 0x2) ? "EN" : "Dis"); + cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); } if (get_msr(cpu, MSR_HWP_STATUS, &msr)) return 0; fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " - "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", - cpu, msr, - ((msr) & 0x1) ? "" : "No-", - ((msr) & 0x2) ? "" : "No-"); + "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", + cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-"); return 0; } @@ -4257,8 +4215,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 5) ? "Auto-HWP, " : "", (msr & 1 << 4) ? "Graphics, " : "", (msr & 1 << 2) ? "bit2, " : "", - (msr & 1 << 1) ? "ThermStatus, " : "", - (msr & 1 << 0) ? "PROCHOT, " : ""); + (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", (msr & 1 << 31) ? "bit31, " : "", (msr & 1 << 30) ? "bit30, " : "", @@ -4272,8 +4229,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 21) ? "Auto-HWP, " : "", (msr & 1 << 20) ? "Graphics, " : "", (msr & 1 << 18) ? "bit18, " : "", - (msr & 1 << 17) ? "ThermStatus, " : "", - (msr & 1 << 16) ? "PROCHOT, " : ""); + (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); } if (do_gfx_perf_limit_reasons) { @@ -4286,8 +4242,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 8) ? "Amps, " : "", (msr & 1 << 9) ? "GFXPwr, " : "", - (msr & 1 << 10) ? "PkgPwrL1, " : "", - (msr & 1 << 11) ? "PkgPwrL2, " : ""); + (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", @@ -4295,8 +4250,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 22) ? "VR-Therm, " : "", (msr & 1 << 24) ? "Amps, " : "", (msr & 1 << 25) ? "GFXPwr, " : "", - (msr & 1 << 26) ? "PkgPwrL1, " : "", - (msr & 1 << 27) ? "PkgPwrL2, " : ""); + (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); } if (do_ring_perf_limit_reasons) { get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); @@ -4306,21 +4260,19 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 6) ? "VR-Therm, " : "", (msr & 1 << 8) ? "Amps, " : "", - (msr & 1 << 10) ? "PkgPwrL1, " : "", - (msr & 1 << 11) ? "PkgPwrL2, " : ""); + (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", (msr & 1 << 16) ? "PROCHOT, " : "", (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 22) ? "VR-Therm, " : "", (msr & 1 << 24) ? "Amps, " : "", - (msr & 1 << 26) ? "PkgPwrL1, " : "", - (msr & 1 << 27) ? "PkgPwrL2, " : ""); + (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); } return 0; } #define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ -#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ +#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ double get_tdp_intel(unsigned int model) { @@ -4349,8 +4301,7 @@ double get_tdp_amd(unsigned int family) * rapl_dram_energy_units_probe() * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. */ -static double -rapl_dram_energy_units_probe(int model, double rapl_energy_units) +static double rapl_dram_energy_units_probe(int model, double rapl_energy_units) { /* only called for genuine_intel, family 6 */ @@ -4402,7 +4353,9 @@ void rapl_probe_intel(unsigned int family, unsigned int model) BIC_PRESENT(BIC_PkgWatt); break; case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; + do_rapl = + RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS + | RAPL_GFX | RAPL_PKG_POWER_INFO; if (rapl_joules) { BIC_PRESENT(BIC_Pkg_J); BIC_PRESENT(BIC_Cor_J); @@ -4425,7 +4378,9 @@ void rapl_probe_intel(unsigned int family, unsigned int model) break; case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; + do_rapl = + RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS + | RAPL_GFX | RAPL_PKG_POWER_INFO; BIC_PRESENT(BIC_PKG__); BIC_PRESENT(BIC_RAM__); if (rapl_joules) { @@ -4445,7 +4400,9 @@ void rapl_probe_intel(unsigned int family, unsigned int model) case INTEL_FAM6_SKYLAKE_X: /* SKX */ case INTEL_FAM6_ICELAKE_X: /* ICX */ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ - do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + do_rapl = + RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | + RAPL_PKG_POWER_INFO; BIC_PRESENT(BIC_PKG__); BIC_PRESENT(BIC_RAM__); if (rapl_joules) { @@ -4458,7 +4415,9 @@ void rapl_probe_intel(unsigned int family, unsigned int model) break; case INTEL_FAM6_SANDYBRIDGE_X: case INTEL_FAM6_IVYBRIDGE_X: - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; + do_rapl = + RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | + RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; BIC_PRESENT(BIC_PKG__); BIC_PRESENT(BIC_RAM__); if (rapl_joules) { @@ -4483,7 +4442,9 @@ void rapl_probe_intel(unsigned int family, unsigned int model) } break; case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ - do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; + do_rapl = + RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | + RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; BIC_PRESENT(BIC_PKG__); BIC_PRESENT(BIC_RAM__); if (rapl_joules) { @@ -4600,8 +4561,7 @@ void perf_limit_reasons_probe(unsigned int family, unsigned int model) void automatic_cstate_conversion_probe(unsigned int family, unsigned int model) { - if (is_skx(family, model) || is_bdx(family, model) || - is_icx(family, model)) + if (is_skx(family, model) || is_bdx(family, model) || is_icx(family, model)) has_automatic_cstate_conversion = 1; } @@ -4636,8 +4596,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; dts = (msr >> 16) & 0x7F; - fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", - cpu, msr, tj_max - dts); + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) return 0; @@ -4648,7 +4607,6 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p cpu, msr, tj_max - dts, tj_max - dts2); } - if (do_dts && debug) { unsigned int resolution; @@ -4678,7 +4636,7 @@ void print_power_limit_msr(int cpu, unsigned long long msr, char *label) cpu, label, ((msr >> 15) & 1) ? "EN" : "DIS", ((msr >> 0) & 0x7FFF) * rapl_power_units, - (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, + (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, (((msr >> 16) & 1) ? "EN" : "DIS")); return; @@ -4719,12 +4677,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (do_rapl & RAPL_PKG_POWER_INFO) { if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) - return -5; - + return -5; fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", cpu, msr, - ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); @@ -4743,17 +4700,17 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu, ((msr >> 47) & 1) ? "EN" : "DIS", ((msr >> 32) & 0x7FFF) * rapl_power_units, - (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, + (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, ((msr >> 48) & 1) ? "EN" : "DIS"); } if (do_rapl & RAPL_DRAM_POWER_INFO) { if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) - return -6; + return -6; fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", cpu, msr, - ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); @@ -4762,7 +4719,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "DRAM Limit"); } @@ -4776,7 +4733,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) return -9; fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "Cores Limit"); } if (do_rapl & RAPL_GFX) { @@ -4788,7 +4745,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) return -9; fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", - cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); print_power_limit_msr(cpu, msr, "GFX Limit"); } return 0; @@ -4810,24 +4767,24 @@ int has_snb_msrs(unsigned int family, unsigned int model) switch (model) { case INTEL_FAM6_SANDYBRIDGE: case INTEL_FAM6_SANDYBRIDGE_X: - case INTEL_FAM6_IVYBRIDGE: /* IVB */ - case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ - case INTEL_FAM6_HASWELL: /* HSW */ - case INTEL_FAM6_HASWELL_X: /* HSW */ - case INTEL_FAM6_HASWELL_L: /* HSW */ - case INTEL_FAM6_HASWELL_G: /* HSW */ - case INTEL_FAM6_BROADWELL: /* BDW */ - case INTEL_FAM6_BROADWELL_G: /* BDW */ - case INTEL_FAM6_BROADWELL_X: /* BDX */ - case INTEL_FAM6_SKYLAKE_L: /* SKL */ - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ - case INTEL_FAM6_SKYLAKE_X: /* SKX */ - case INTEL_FAM6_ICELAKE_X: /* ICX */ - case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_IVYBRIDGE: /* IVB */ + case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ + case INTEL_FAM6_HASWELL: /* HSW */ + case INTEL_FAM6_HASWELL_X: /* HSW */ + case INTEL_FAM6_HASWELL_L: /* HSW */ + case INTEL_FAM6_HASWELL_G: /* HSW */ + case INTEL_FAM6_BROADWELL: /* BDW */ + case INTEL_FAM6_BROADWELL_G: /* BDW */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_SKYLAKE_L: /* SKL */ + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ - case INTEL_FAM6_ATOM_TREMONT: /* EHL */ - case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ return 1; } return 0; @@ -4913,7 +4870,7 @@ int is_cnl(unsigned int family, unsigned int model) return 0; switch (model) { - case INTEL_FAM6_CANNONLAKE_L: /* CNL */ + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ return 1; } @@ -4928,7 +4885,7 @@ unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) } #define SLM_BCLK_FREQS 5 -double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; +double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; double slm_bclk(void) { @@ -4979,7 +4936,7 @@ int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) __cpuid(0x1a, eax, ebx, ecx, edx); eax = (eax >> 24) & 0xFF; - if (eax == 0x20 ) + if (eax == 0x20) t->is_atom = true; return 0; } @@ -5018,8 +4975,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (tj_max_override != 0) { tj_max = tj_max_override; - fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", - cpu, tj_max); + fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max); return 0; } @@ -5037,16 +4993,15 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk case 4: tcc_offset = (msr >> 24) & 0xF; fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", - cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); + cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); break; case 6: tcc_offset = (msr >> 24) & 0x3F; fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", - cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); + cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); break; default: - fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", - cpu, msr, tcc_default); + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); break; } } @@ -5060,8 +5015,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk guess: tj_max = TJMAX_DEFAULT; - fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", - cpu, tj_max); + fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); return 0; } @@ -5072,9 +5026,7 @@ void decode_feature_control_msr(void) if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", - base_cpu, msr, - msr & FEAT_CTL_LOCKED ? "" : "UN-", - msr & (1 << 18) ? "SGX" : ""); + base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); } void decode_misc_enable_msr(void) @@ -5102,13 +5054,12 @@ void decode_misc_feature_control(void) return; if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) - fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", - base_cpu, msr, - msr & (0 << 0) ? "No-" : "", - msr & (1 << 0) ? "No-" : "", - msr & (2 << 0) ? "No-" : "", - msr & (3 << 0) ? "No-" : ""); + fprintf(outf, + "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", + base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", + msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : ""); } + /* * Decode MSR_MISC_PWR_MGMT * @@ -5129,10 +5080,9 @@ void decode_misc_pwr_mgmt_msr(void) if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", base_cpu, msr, - msr & (1 << 0) ? "DIS" : "EN", - msr & (1 << 1) ? "EN" : "DIS", - msr & (1 << 8) ? "EN" : "DIS"); + msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); } + /* * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG * @@ -5158,10 +5108,10 @@ void decode_c6_demotion_policy_msr(void) unsigned int intel_model_duplicates(unsigned int model) { - switch(model) { + switch (model) { case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ - case 0x1F: /* Core i7 and i5 Processor - Nehalem */ + case 0x1F: /* Core i7 and i5 Processor - Nehalem */ case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */ case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */ return INTEL_FAM6_NEHALEM; @@ -5224,13 +5174,11 @@ void print_dev_latency(void) close(fd); return; } - fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", - value, value == 2000000000 ? "default" : "constrained"); + fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained"); close(fd); } - /* * Linux-perf manages the the HW instructions-retired counter * by enabling when requested, and hiding rollover @@ -5296,7 +5244,8 @@ void process_cpuid() if (!quiet) { fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n", - family, model, stepping, family, model, stepping, (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); + family, model, stepping, family, model, stepping, + (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", ecx_flags & (1 << 0) ? "SSE3" : "-", @@ -5307,8 +5256,7 @@ void process_cpuid() edx_flags & (1 << 4) ? "TSC" : "-", edx_flags & (1 << 5) ? "MSR" : "-", edx_flags & (1 << 22) ? "ACPI-TM" : "-", - edx_flags & (1 << 28) ? "HT" : "-", - edx_flags & (1 << 29) ? "TM" : "-"); + edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); } if (genuine_intel) { model_orig = model; @@ -5364,14 +5312,11 @@ void process_cpuid() has_hwp ? "" : "No-", has_hwp_notify ? "" : "No-", has_hwp_activity_window ? "" : "No-", - has_hwp_epp ? "" : "No-", - has_hwp_pkg ? "" : "No-", - has_epb ? "" : "No-"); + has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); if (!quiet) decode_misc_enable_msr(); - if (max_level >= 0x7 && !quiet) { int has_sgx; @@ -5403,7 +5348,7 @@ void process_cpuid() eax_crystal, ebx_tsc, crystal_hz); if (crystal_hz == 0) - switch(model) { + switch (model) { case INTEL_FAM6_SKYLAKE_L: /* SKL */ crystal_hz = 24000000; /* 24.0 MHz */ break; @@ -5416,13 +5361,13 @@ void process_cpuid() break; default: crystal_hz = 0; - } + } if (crystal_hz) { - tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; + tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; if (!quiet) fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", - tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); + tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); } } } @@ -5517,10 +5462,9 @@ void process_cpuid() BIC_PRESENT(BIC_CPUGFX); } do_slm_cstates = is_slm(family, model); - do_knl_cstates = is_knl(family, model); + do_knl_cstates = is_knl(family, model); - if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || - is_ehl(family, model)) + if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model)) BIC_NOT_PRESENT(BIC_CPU_c3); if (!quiet) @@ -5614,7 +5558,7 @@ void topology_probe() if (debug > 1) fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); - cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); + cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); if (cpus == NULL) err(1, "calloc cpus"); @@ -5693,22 +5637,19 @@ void topology_probe() topo.cores_per_node = max_core_id + 1; if (debug > 1) - fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", - max_core_id, topo.cores_per_node); + fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); if (!summary_only && topo.cores_per_node > 1) BIC_PRESENT(BIC_Core); topo.num_die = max_die_id + 1; if (debug > 1) - fprintf(outf, "max_die_id %d, sizing for %d die\n", - max_die_id, topo.num_die); + fprintf(outf, "max_die_id %d, sizing for %d die\n", max_die_id, topo.num_die); if (!summary_only && topo.num_die > 1) BIC_PRESENT(BIC_Die); topo.num_packages = max_package_id + 1; if (debug > 1) - fprintf(outf, "max_package_id %d, sizing for %d packages\n", - max_package_id, topo.num_packages); + fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); if (!summary_only && topo.num_packages > 1) BIC_PRESENT(BIC_Package); @@ -5731,21 +5672,15 @@ void topology_probe() fprintf(outf, "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", i, cpus[i].physical_package_id, cpus[i].die_id, - cpus[i].physical_node_id, - cpus[i].logical_node_id, - cpus[i].physical_core_id, - cpus[i].thread_id); + cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); } } -void -allocate_counters(struct thread_data **t, struct core_data **c, - struct pkg_data **p) +void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) { int i; - int num_cores = topo.cores_per_node * topo.nodes_per_pkg * - topo.num_packages; + int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; int num_threads = topo.threads_per_core * num_cores; *t = calloc(num_threads, sizeof(struct thread_data)); @@ -5773,13 +5708,13 @@ allocate_counters(struct thread_data **t, struct core_data **c, error: err(1, "calloc counters"); } + /* * init_counter() * * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE */ -void init_counter(struct thread_data *thread_base, struct core_data *core_base, - struct pkg_data *pkg_base, int cpu_id) +void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) { int pkg_id = cpus[cpu_id].physical_package_id; int node_id = cpus[cpu_id].logical_node_id; @@ -5789,7 +5724,6 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct core_data *c; struct pkg_data *p; - /* Workaround for systems where physical_node_id==-1 * and logical_node_id==(-1 - topo.num_cpus) */ @@ -5811,7 +5745,6 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base, p->package_id = pkg_id; } - int initialize_counters(int cpu_id) { init_counter(EVEN_COUNTERS, cpu_id); @@ -5826,12 +5759,14 @@ void allocate_output_buffer() if (outp == NULL) err(-1, "calloc output buffer"); } + void allocate_fd_percpu(void) { fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); if (fd_percpu == NULL) err(-1, "calloc fd_percpu"); } + void allocate_irq_buffers(void) { irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); @@ -5842,6 +5777,7 @@ void allocate_irq_buffers(void) if (irqs_per_cpu == NULL) err(-1, "calloc %d", topo.max_cpu_num + 1); } + void setup_all_buffers(void) { topology_probe(); @@ -5872,7 +5808,6 @@ void turbostat_init() process_cpuid(); linux_perf_init(); - if (!quiet) for_all_cpus(print_hwp, ODD_COUNTERS); @@ -5945,7 +5880,7 @@ int fork_it(char **argv) format_all_counters(EVEN_COUNTERS); } - fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); + fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); flush_output_stderr(); @@ -5970,14 +5905,14 @@ int get_and_dump_counters(void) return status; } -void print_version() { - fprintf(outf, "turbostat version 21.03.12" - " - Len Brown \n"); +void print_version() +{ + fprintf(outf, "turbostat version 21.03.12" " - Len Brown \n"); } int add_counter(unsigned int msr_num, char *path, char *name, - unsigned int width, enum counter_scope scope, - enum counter_type type, enum counter_format format, int flags) + unsigned int width, enum counter_scope scope, + enum counter_type type, enum counter_format format, int flags) { struct msr_counter *msrp; @@ -6003,8 +5938,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, sys.tp = msrp; sys.added_thread_counters++; if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) { - fprintf(stderr, "exceeded max %d added thread counters\n", - MAX_ADDED_COUNTERS); + fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS); exit(-1); } break; @@ -6014,8 +5948,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, sys.cp = msrp; sys.added_core_counters++; if (sys.added_core_counters > MAX_ADDED_COUNTERS) { - fprintf(stderr, "exceeded max %d added core counters\n", - MAX_ADDED_COUNTERS); + fprintf(stderr, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS); exit(-1); } break; @@ -6025,8 +5958,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, sys.pp = msrp; sys.added_package_counters++; if (sys.added_package_counters > MAX_ADDED_COUNTERS) { - fprintf(stderr, "exceeded max %d added package counters\n", - MAX_ADDED_COUNTERS); + fprintf(stderr, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS); exit(-1); } break; @@ -6163,15 +6095,14 @@ void probe_sysfs(void) for (state = 10; state >= 0; --state) { - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", - base_cpu, state); + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); input = fopen(path, "r"); if (input == NULL) continue; if (!fgets(name_buf, sizeof(name_buf), input)) err(1, "%s: failed to read file", path); - /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); if (!sp) sp = strchrnul(name_buf, '\n'); @@ -6187,20 +6118,18 @@ void probe_sysfs(void) if (is_deferred_skip(name_buf)) continue; - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, - FORMAT_PERCENT, SYSFS_PERCPU); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU); } for (state = 10; state >= 0; --state) { - sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", - base_cpu, state); + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); input = fopen(path, "r"); if (input == NULL) continue; if (!fgets(name_buf, sizeof(name_buf), input)) err(1, "%s: failed to read file", path); - /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ sp = strchr(name_buf, '-'); if (!sp) sp = strchrnul(name_buf, '\n'); @@ -6214,13 +6143,11 @@ void probe_sysfs(void) if (is_deferred_skip(name_buf)) continue; - add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, - FORMAT_DELTA, SYSFS_PERCPU); + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU); } } - /* * parse cpuset with following syntax * 1,2,4..6,8-10 and set bits in cpu_subset @@ -6307,37 +6234,35 @@ error: exit(-1); } - void cmdline(int argc, char **argv) { int opt; int option_index = 0; static struct option long_options[] = { - {"add", required_argument, 0, 'a'}, - {"cpu", required_argument, 0, 'c'}, - {"Dump", no_argument, 0, 'D'}, - {"debug", no_argument, 0, 'd'}, /* internal, not documented */ - {"enable", required_argument, 0, 'e'}, - {"interval", required_argument, 0, 'i'}, - {"IPC", no_argument, 0, 'I'}, - {"num_iterations", required_argument, 0, 'n'}, - {"help", no_argument, 0, 'h'}, - {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help - {"Joules", no_argument, 0, 'J'}, - {"list", no_argument, 0, 'l'}, - {"out", required_argument, 0, 'o'}, - {"quiet", no_argument, 0, 'q'}, - {"show", required_argument, 0, 's'}, - {"Summary", no_argument, 0, 'S'}, - {"TCC", required_argument, 0, 'T'}, - {"version", no_argument, 0, 'v' }, - {0, 0, 0, 0 } + { "add", required_argument, 0, 'a' }, + { "cpu", required_argument, 0, 'c' }, + { "Dump", no_argument, 0, 'D' }, + { "debug", no_argument, 0, 'd' }, /* internal, not documented */ + { "enable", required_argument, 0, 'e' }, + { "interval", required_argument, 0, 'i' }, + { "IPC", no_argument, 0, 'I' }, + { "num_iterations", required_argument, 0, 'n' }, + { "help", no_argument, 0, 'h' }, + { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help + { "Joules", no_argument, 0, 'J' }, + { "list", no_argument, 0, 'l' }, + { "out", required_argument, 0, 'o' }, + { "quiet", no_argument, 0, 'q' }, + { "show", required_argument, 0, 's' }, + { "Summary", no_argument, 0, 'S' }, + { "TCC", required_argument, 0, 'T' }, + { "version", no_argument, 0, 'v' }, + { 0, 0, 0, 0 } }; progname = argv[0]; - while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", - long_options, &option_index)) != -1) { + while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) { switch (opt) { case 'a': parse_add_command(optarg); @@ -6372,8 +6297,7 @@ void cmdline(int argc, char **argv) double interval = strtod(optarg, NULL); if (interval < 0.001) { - fprintf(outf, "interval %f seconds is too small\n", - interval); + fprintf(outf, "interval %f seconds is too small\n", interval); exit(2); } @@ -6400,8 +6324,7 @@ void cmdline(int argc, char **argv) num_iterations = strtod(optarg, NULL); if (num_iterations <= 0) { - fprintf(outf, "iterations %d should be positive number\n", - num_iterations); + fprintf(outf, "iterations %d should be positive number\n", num_iterations); exit(2); } break; -- cgit v1.2.3 From 38c6663a68903cf1187003129cd1873551979865 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 4 May 2021 19:27:34 -0400 Subject: tools/power turbostat: elevate priority of interval mode This makes interval mode less likely to see delayed results on a heavily loaded system. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 66 ++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 13805e460a4d..a0dc39d682cd 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2266,31 +2266,48 @@ char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", int nhm_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, -PCLRSV, PCLRSV }; + PCLRSV, PCLRSV +}; + int snb_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, -PCLRSV, PCLRSV }; + PCLRSV, PCLRSV +}; + int hsw_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, -PCLRSV, PCLRSV }; + PCLRSV, PCLRSV +}; + int slv_pkg_cstate_limits[16] = { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, -PCL__6, PCL__7 }; + PCL__6, PCL__7 +}; + int amt_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, -PCLRSV, PCLRSV }; + PCLRSV, PCLRSV +}; + int phi_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, -PCLRSV, PCLRSV }; + PCLRSV, PCLRSV +}; + int glm_pkg_cstate_limits[16] = { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, -PCLRSV, PCLRSV }; + PCLRSV, PCLRSV +}; + int skx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, -PCLRSV, PCLRSV }; + PCLRSV, PCLRSV +}; + int icx_pkg_cstate_limits[16] = { PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, -PCLRSV, PCLRSV }; + PCLRSV, PCLRSV +}; static void calculate_tsc_tweak() { @@ -3401,6 +3418,32 @@ release_msr: free(per_cpu_msr_sum); } +/* + * set_my_sched_priority(pri) + * return previous + */ +int set_my_sched_priority(int priority) +{ + int retval; + int original_priority; + + errno = 0; + original_priority = getpriority(PRIO_PROCESS, 0); + if (errno && (original_priority == -1)) + err(errno, "getpriority"); + + retval = setpriority(PRIO_PROCESS, 0, priority); + if (retval) + err(retval, "setpriority(%d)", priority); + + errno = 0; + retval = getpriority(PRIO_PROCESS, 0); + if (retval != priority) + err(-1, "getpriority(%d) != setpriority(%d)", retval, priority); + + return original_priority; +} + void turbostat_loop() { int retval; @@ -3409,6 +3452,11 @@ void turbostat_loop() setup_signal_handler(); + /* + * elevate own priority for interval mode + */ + set_my_sched_priority(-20); + restart: restarted++; -- cgit v1.2.3 From b60c573dc241ab3a8719e990d86a0011b79eebcb Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 4 May 2021 19:56:17 -0400 Subject: tools/power turbostat: Support "turbostat --hide idle" As idle, in particular, can have many columns on some machines... Make it easy to ignore them all at once. Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 6 ++++-- tools/power/x86/turbostat/turbostat.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index f6b7e85b121c..9b17097bc3d7 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -54,12 +54,14 @@ name as necessary to disambiguate it from others is necessary. Note that option .PP \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 .PP -\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group. +\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. .PP \fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC". The column name "all" can be used to enable all disabled-by-default built-in counters. .PP -\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group. +\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. +.PP +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other". .PP \fB--Dump\fP displays the raw counter values. .PP diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index a0dc39d682cd..4bb08de4cb71 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -667,6 +667,12 @@ struct msr_counter bic[] = { #define BIC_GFXACTMHz (1ULL << 51) #define BIC_IPC (1ULL << 52) +#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) +#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) +#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz ) +#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX) +#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) + #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); @@ -748,6 +754,18 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) if (!strcmp(name_list, "all")) return ~0; + if (!strcmp(name_list, "topology")) + return BIC_TOPOLOGY; + if (!strcmp(name_list, "power")) + return BIC_THERMAL_PWR; + if (!strcmp(name_list, "idle")) + return BIC_IDLE; + if (!strcmp(name_list, "frequency")) + return BIC_FREQUENCY; + if (!strcmp(name_list, "other")) + return BIC_OTHER; + if (!strcmp(name_list, "all")) + return 0; for (i = 0; i < MAX_BIC; ++i) { if (!strcmp(name_list, bic[i].name)) { -- cgit v1.2.3 From 3c070b2abf85b92455c2721d0a9edc68893ab6c1 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 4 May 2021 19:58:08 -0400 Subject: tools/power turbostat: version 2021.05.04 Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 4bb08de4cb71..47d3ba895d6d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3,7 +3,7 @@ * turbostat -- show CPU frequency and C-state residency * on modern Intel and AMD processors. * - * Copyright (c) 2013 Intel Corporation. + * Copyright (c) 2021 Intel Corporation. * Len Brown */ @@ -5973,7 +5973,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 21.03.12" " - Len Brown \n"); + fprintf(outf, "turbostat version 21.05.04" " - Len Brown \n"); } int add_counter(unsigned int msr_num, char *path, char *name, -- cgit v1.2.3 From fa6c02315f745f00b62c634b220c3fb5c3310258 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Tue, 4 May 2021 18:34:23 -0700 Subject: mm: huge_memory: a new debugfs interface for splitting THP tests We did not have a direct user interface of splitting the compound page backing a THP and there is no need unless we want to expose the THP implementation details to users. Make /split_huge_pages accept a new command to do that. By writing ",," to /split_huge_pages, THPs within the given virtual address range from the process with the given pid are split. It is used to test split_huge_page function. In addition, a selftest program is added to tools/testing/selftests/vm to utilize the interface by splitting PMD THPs and PTE-mapped THPs. This does not change the old behavior, i.e., writing 1 to the interface to split all THPs in the system. Link: https://lkml.kernel.org/r/20210331235309.332292-1-zi.yan@sent.com Signed-off-by: Zi Yan Reviewed-by: Yang Shi Cc: David Hildenbrand Cc: David Rientjes Cc: John Hubbard Cc: "Kirill A . Shutemov" Cc: Matthew Wilcox Cc: Mika Penttila Cc: Sandipan Das Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/.gitignore | 1 + tools/testing/selftests/vm/Makefile | 1 + tools/testing/selftests/vm/split_huge_page_test.c | 318 ++++++++++++++++++++++ 3 files changed, 320 insertions(+) create mode 100644 tools/testing/selftests/vm/split_huge_page_test.c (limited to 'tools') diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore index 9a35c3f6a557..1f651e85ed60 100644 --- a/tools/testing/selftests/vm/.gitignore +++ b/tools/testing/selftests/vm/.gitignore @@ -22,3 +22,4 @@ map_fixed_noreplace write_to_hugetlbfs hmm-tests local_config.* +split_huge_page_test diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 8b0cd421ebd3..73e1cc96d7c2 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -42,6 +42,7 @@ TEST_GEN_FILES += on-fault-limit TEST_GEN_FILES += thuge-gen TEST_GEN_FILES += transhuge-stress TEST_GEN_FILES += userfaultfd +TEST_GEN_FILES += split_huge_page_test ifeq ($(MACHINE),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32) diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c new file mode 100644 index 000000000000..2c0c18e60c57 --- /dev/null +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual + * address range in a process via /split_huge_pages interface. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +uint64_t pagesize; +unsigned int pageshift; +uint64_t pmd_pagesize; + +#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size" +#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages" +#define SMAP_PATH "/proc/self/smaps" +#define INPUT_MAX 80 + +#define PFN_MASK ((1UL<<55)-1) +#define KPF_THP (1UL<<22) + +int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file) +{ + uint64_t paddr; + uint64_t page_flags; + + if (pagemap_file) { + pread(pagemap_file, &paddr, sizeof(paddr), + ((long)vaddr >> pageshift) * sizeof(paddr)); + + if (kpageflags_file) { + pread(kpageflags_file, &page_flags, sizeof(page_flags), + (paddr & PFN_MASK) * sizeof(page_flags)); + + return !!(page_flags & KPF_THP); + } + } + return 0; +} + + +static uint64_t read_pmd_pagesize(void) +{ + int fd; + char buf[20]; + ssize_t num_read; + + fd = open(PMD_SIZE_PATH, O_RDONLY); + if (fd == -1) { + perror("Open hpage_pmd_size failed"); + exit(EXIT_FAILURE); + } + num_read = read(fd, buf, 19); + if (num_read < 1) { + close(fd); + perror("Read hpage_pmd_size failed"); + exit(EXIT_FAILURE); + } + buf[num_read] = '\0'; + close(fd); + + return strtoul(buf, NULL, 10); +} + +static int write_file(const char *path, const char *buf, size_t buflen) +{ + int fd; + ssize_t numwritten; + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwritten = write(fd, buf, buflen - 1); + close(fd); + if (numwritten < 1) + return 0; + + return (unsigned int) numwritten; +} + +static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end) +{ + char input[INPUT_MAX]; + int ret; + + ret = snprintf(input, INPUT_MAX, "%d,0x%lx,0x%lx", pid, vaddr_start, + vaddr_end); + if (ret >= INPUT_MAX) { + printf("%s: Debugfs input is too long\n", __func__); + exit(EXIT_FAILURE); + } + + if (!write_file(SPLIT_DEBUGFS, input, ret + 1)) { + perror(SPLIT_DEBUGFS); + exit(EXIT_FAILURE); + } +} + +#define MAX_LINE_LENGTH 500 + +static bool check_for_pattern(FILE *fp, const char *pattern, char *buf) +{ + while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) { + if (!strncmp(buf, pattern, strlen(pattern))) + return true; + } + return false; +} + +static uint64_t check_huge(void *addr) +{ + uint64_t thp = 0; + int ret; + FILE *fp; + char buffer[MAX_LINE_LENGTH]; + char addr_pattern[MAX_LINE_LENGTH]; + + ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-", + (unsigned long) addr); + if (ret >= MAX_LINE_LENGTH) { + printf("%s: Pattern is too long\n", __func__); + exit(EXIT_FAILURE); + } + + + fp = fopen(SMAP_PATH, "r"); + if (!fp) { + printf("%s: Failed to open file %s\n", __func__, SMAP_PATH); + exit(EXIT_FAILURE); + } + if (!check_for_pattern(fp, addr_pattern, buffer)) + goto err_out; + + /* + * Fetch the AnonHugePages: in the same block and check the number of + * hugepages. + */ + if (!check_for_pattern(fp, "AnonHugePages:", buffer)) + goto err_out; + + if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) { + printf("Reading smap error\n"); + exit(EXIT_FAILURE); + } + +err_out: + fclose(fp); + return thp; +} + +void split_pmd_thp(void) +{ + char *one_page; + size_t len = 4 * pmd_pagesize; + uint64_t thp_size; + size_t i; + + one_page = memalign(pmd_pagesize, len); + + if (!one_page) { + printf("Fail to allocate memory\n"); + exit(EXIT_FAILURE); + } + + madvise(one_page, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + one_page[i] = (char)i; + + thp_size = check_huge(one_page); + if (!thp_size) { + printf("No THP is allocated\n"); + exit(EXIT_FAILURE); + } + + /* split all THPs */ + write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len); + + for (i = 0; i < len; i++) + if (one_page[i] != (char)i) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + + + thp_size = check_huge(one_page); + if (thp_size) { + printf("Still %ld kB AnonHugePages not split\n", thp_size); + exit(EXIT_FAILURE); + } + + printf("Split huge pages successful\n"); + free(one_page); +} + +void split_pte_mapped_thp(void) +{ + char *one_page, *pte_mapped, *pte_mapped2; + size_t len = 4 * pmd_pagesize; + uint64_t thp_size; + size_t i; + const char *pagemap_template = "/proc/%d/pagemap"; + const char *kpageflags_proc = "/proc/kpageflags"; + char pagemap_proc[255]; + int pagemap_fd; + int kpageflags_fd; + + if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) { + perror("get pagemap proc error"); + exit(EXIT_FAILURE); + } + pagemap_fd = open(pagemap_proc, O_RDONLY); + + if (pagemap_fd == -1) { + perror("read pagemap:"); + exit(EXIT_FAILURE); + } + + kpageflags_fd = open(kpageflags_proc, O_RDONLY); + + if (kpageflags_fd == -1) { + perror("read kpageflags:"); + exit(EXIT_FAILURE); + } + + one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + + madvise(one_page, len, MADV_HUGEPAGE); + + for (i = 0; i < len; i++) + one_page[i] = (char)i; + + thp_size = check_huge(one_page); + if (!thp_size) { + printf("No THP is allocated\n"); + exit(EXIT_FAILURE); + } + + /* remap the first pagesize of first THP */ + pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE); + + /* remap the Nth pagesize of Nth THP */ + for (i = 1; i < 4; i++) { + pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i, + pagesize, pagesize, + MREMAP_MAYMOVE|MREMAP_FIXED, + pte_mapped + pagesize * i); + if (pte_mapped2 == (char *)-1) { + perror("mremap failed"); + exit(EXIT_FAILURE); + } + } + + /* smap does not show THPs after mremap, use kpageflags instead */ + thp_size = 0; + for (i = 0; i < pagesize * 4; i++) + if (i % pagesize == 0 && + is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) + thp_size++; + + if (thp_size != 4) { + printf("Some THPs are missing during mremap\n"); + exit(EXIT_FAILURE); + } + + /* split all remapped THPs */ + write_debugfs(getpid(), (uint64_t)pte_mapped, + (uint64_t)pte_mapped + pagesize * 4); + + /* smap does not show THPs after mremap, use kpageflags instead */ + thp_size = 0; + for (i = 0; i < pagesize * 4; i++) { + if (pte_mapped[i] != (char)i) { + printf("%ld byte corrupted\n", i); + exit(EXIT_FAILURE); + } + if (i % pagesize == 0 && + is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd)) + thp_size++; + } + + if (thp_size) { + printf("Still %ld THPs not split\n", thp_size); + exit(EXIT_FAILURE); + } + + printf("Split PTE-mapped huge pages successful\n"); + munmap(one_page, len); + close(pagemap_fd); + close(kpageflags_fd); +} + +int main(int argc, char **argv) +{ + if (geteuid() != 0) { + printf("Please run the benchmark as root\n"); + exit(EXIT_FAILURE); + } + + pagesize = getpagesize(); + pageshift = ffs(pagesize) - 1; + pmd_pagesize = read_pmd_pagesize(); + + split_pmd_thp(); + split_pte_mapped_thp(); + + return 0; +} -- cgit v1.2.3 From fbe37501b2526a71d82b898671260524279c6765 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Tue, 4 May 2021 18:34:26 -0700 Subject: mm: huge_memory: debugfs for file-backed THP split Further extend /split_huge_pages to accept ",," for file-backed THP split tests since tmpfs may have file backed by THP that mapped nowhere. Update selftest program to test file-backed THP split too. Link: https://lkml.kernel.org/r/20210331235309.332292-2-zi.yan@sent.com Signed-off-by: Zi Yan Suggested-by: Kirill A. Shutemov Reviewed-by: Yang Shi Cc: "Kirill A . Shutemov" Cc: Shuah Khan Cc: John Hubbard Cc: Sandipan Das Cc: David Hildenbrand Cc: Mika Penttila Cc: David Rientjes Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/split_huge_page_test.c | 82 +++++++++++++++++++++-- 1 file changed, 77 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 2c0c18e60c57..1af16d2c2a0a 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -7,11 +7,13 @@ #define _GNU_SOURCE #include #include +#include #include #include #include #include #include +#include #include #include @@ -24,6 +26,9 @@ uint64_t pmd_pagesize; #define SMAP_PATH "/proc/self/smaps" #define INPUT_MAX 80 +#define PID_FMT "%d,0x%lx,0x%lx" +#define PATH_FMT "%s,0x%lx,0x%lx" + #define PFN_MASK ((1UL<<55)-1) #define KPF_THP (1UL<<22) @@ -87,13 +92,16 @@ static int write_file(const char *path, const char *buf, size_t buflen) return (unsigned int) numwritten; } -static void write_debugfs(int pid, uint64_t vaddr_start, uint64_t vaddr_end) +static void write_debugfs(const char *fmt, ...) { char input[INPUT_MAX]; int ret; + va_list argp; + + va_start(argp, fmt); + ret = vsnprintf(input, INPUT_MAX, fmt, argp); + va_end(argp); - ret = snprintf(input, INPUT_MAX, "%d,0x%lx,0x%lx", pid, vaddr_start, - vaddr_end); if (ret >= INPUT_MAX) { printf("%s: Debugfs input is too long\n", __func__); exit(EXIT_FAILURE); @@ -183,7 +191,8 @@ void split_pmd_thp(void) } /* split all THPs */ - write_debugfs(getpid(), (uint64_t)one_page, (uint64_t)one_page + len); + write_debugfs(PID_FMT, getpid(), (uint64_t)one_page, + (uint64_t)one_page + len); for (i = 0; i < len; i++) if (one_page[i] != (char)i) { @@ -274,7 +283,7 @@ void split_pte_mapped_thp(void) } /* split all remapped THPs */ - write_debugfs(getpid(), (uint64_t)pte_mapped, + write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped, (uint64_t)pte_mapped + pagesize * 4); /* smap does not show THPs after mremap, use kpageflags instead */ @@ -300,6 +309,68 @@ void split_pte_mapped_thp(void) close(kpageflags_fd); } +void split_file_backed_thp(void) +{ + int status; + int fd; + ssize_t num_written; + char tmpfs_template[] = "/tmp/thp_split_XXXXXX"; + const char *tmpfs_loc = mkdtemp(tmpfs_template); + char testfile[INPUT_MAX]; + uint64_t pgoff_start = 0, pgoff_end = 1024; + + printf("Please enable pr_debug in split_huge_pages_in_file() if you need more info.\n"); + + status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m"); + + if (status) { + printf("Unable to create a tmpfs for testing\n"); + exit(EXIT_FAILURE); + } + + status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc); + if (status >= INPUT_MAX) { + printf("Fail to create file-backed THP split testing file\n"); + goto cleanup; + } + + fd = open(testfile, O_CREAT|O_WRONLY); + if (fd == -1) { + perror("Cannot open testing file\n"); + goto cleanup; + } + + /* write something to the file, so a file-backed THP can be allocated */ + num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc)); + close(fd); + + if (num_written < 1) { + printf("Fail to write data to testing file\n"); + goto cleanup; + } + + /* split the file-backed THP */ + write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end); + + status = unlink(testfile); + if (status) + perror("Cannot remove testing file\n"); + +cleanup: + status = umount(tmpfs_loc); + if (status) { + printf("Unable to umount %s\n", tmpfs_loc); + exit(EXIT_FAILURE); + } + status = rmdir(tmpfs_loc); + if (status) { + perror("cannot remove tmp dir"); + exit(EXIT_FAILURE); + } + + printf("file-backed THP split test done, please check dmesg for more information\n"); +} + int main(int argc, char **argv) { if (geteuid() != 0) { @@ -313,6 +384,7 @@ int main(int argc, char **argv) split_pmd_thp(); split_pte_mapped_thp(); + split_file_backed_thp(); return 0; } -- cgit v1.2.3 From f0fa94330919be8ec5620382b50f1c72844c9224 Mon Sep 17 00:00:00 2001 From: Axel Rasmussen Date: Tue, 4 May 2021 18:35:57 -0700 Subject: userfaultfd/selftests: add test exercising minor fault handling Fix a dormant bug in userfaultfd_events_test(), where we did `return faulting_process(0)` instead of `exit(faulting_process(0))`. This caused the forked process to keep running, trying to execute any further test cases after the events test in parallel with the "real" process. Add a simple test case which exercises minor faults. In short, it does the following: 1. "Sets up" an area (area_dst) and a second shared mapping to the same underlying pages (area_dst_alias). 2. Register one of these areas with userfaultfd, in minor fault mode. 3. Start a second thread to handle any minor faults. 4. Populate the underlying pages with the non-UFFD-registered side of the mapping. Basically, memset() each page with some arbitrary contents. 5. Then, using the UFFD-registered mapping, read all of the page contents, asserting that the contents match expectations (we expect the minor fault handling thread can modify the page contents before resolving the fault). The minor fault handling thread, upon receiving an event, flips all the bits (~) in that page, just to prove that it can modify it in some arbitrary way. Then it issues a UFFDIO_CONTINUE ioctl, to setup the mapping and resolve the fault. The reading thread should wake up and see this modification. Currently the minor fault test is only enabled in hugetlb_shared mode, as this is the only configuration the kernel feature supports. Link: https://lkml.kernel.org/r/20210301222728.176417-7-axelrasmussen@google.com Signed-off-by: Axel Rasmussen Reviewed-by: Peter Xu Cc: Adam Ruprecht Cc: Alexander Viro Cc: Alexey Dobriyan Cc: Andrea Arcangeli Cc: Anshuman Khandual Cc: Cannon Matthews Cc: Catalin Marinas Cc: Chinwen Chang Cc: David Rientjes Cc: "Dr . David Alan Gilbert" Cc: Huang Ying Cc: Ingo Molnar Cc: Jann Horn Cc: Jerome Glisse Cc: Kirill A. Shutemov Cc: Lokesh Gidra Cc: "Matthew Wilcox (Oracle)" Cc: Michael Ellerman Cc: "Michal Koutn" Cc: Michel Lespinasse Cc: Mike Kravetz Cc: Mike Rapoport Cc: Mina Almasry Cc: Nicholas Piggin Cc: Oliver Upton Cc: Shaohua Li Cc: Shawn Anastasio Cc: Steven Price Cc: Steven Rostedt Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/userfaultfd.c | 164 +++++++++++++++++++++++++++++-- 1 file changed, 158 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 92b8ec423201..f5ab5e0312e7 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -81,6 +81,8 @@ static volatile bool test_uffdio_copy_eexist = true; static volatile bool test_uffdio_zeropage_eexist = true; /* Whether to test uffd write-protection */ static bool test_uffdio_wp = false; +/* Whether to test uffd minor faults */ +static bool test_uffdio_minor = false; static bool map_shared; static int huge_fd; @@ -96,6 +98,7 @@ struct uffd_stats { int cpu; unsigned long missing_faults; unsigned long wp_faults; + unsigned long minor_faults; }; /* pthread_mutex_t starts at page offset 0 */ @@ -153,17 +156,19 @@ static void uffd_stats_reset(struct uffd_stats *uffd_stats, uffd_stats[i].cpu = i; uffd_stats[i].missing_faults = 0; uffd_stats[i].wp_faults = 0; + uffd_stats[i].minor_faults = 0; } } static void uffd_stats_report(struct uffd_stats *stats, int n_cpus) { int i; - unsigned long long miss_total = 0, wp_total = 0; + unsigned long long miss_total = 0, wp_total = 0, minor_total = 0; for (i = 0; i < n_cpus; i++) { miss_total += stats[i].missing_faults; wp_total += stats[i].wp_faults; + minor_total += stats[i].minor_faults; } printf("userfaults: %llu missing (", miss_total); @@ -172,6 +177,9 @@ static void uffd_stats_report(struct uffd_stats *stats, int n_cpus) printf("\b), %llu wp (", wp_total); for (i = 0; i < n_cpus; i++) printf("%lu+", stats[i].wp_faults); + printf("\b), %llu minor (", minor_total); + for (i = 0; i < n_cpus; i++) + printf("%lu+", stats[i].minor_faults); printf("\b)\n"); } @@ -328,7 +336,7 @@ static struct uffd_test_ops shmem_uffd_test_ops = { }; static struct uffd_test_ops hugetlb_uffd_test_ops = { - .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC, + .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC & ~(1 << _UFFDIO_CONTINUE), .allocate_area = hugetlb_allocate_area, .release_pages = hugetlb_release_pages, .alias_mapping = hugetlb_alias_mapping, @@ -362,6 +370,22 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp) } } +static void continue_range(int ufd, __u64 start, __u64 len) +{ + struct uffdio_continue req; + + req.range.start = start; + req.range.len = len; + req.mode = 0; + + if (ioctl(ufd, UFFDIO_CONTINUE, &req)) { + fprintf(stderr, + "UFFDIO_CONTINUE failed for address 0x%" PRIx64 "\n", + (uint64_t)start); + exit(1); + } +} + static void *locking_thread(void *arg) { unsigned long cpu = (unsigned long) arg; @@ -569,8 +593,32 @@ static void uffd_handle_page_fault(struct uffd_msg *msg, } if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { + /* Write protect page faults */ wp_range(uffd, msg->arg.pagefault.address, page_size, false); stats->wp_faults++; + } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) { + uint8_t *area; + int b; + + /* + * Minor page faults + * + * To prove we can modify the original range for testing + * purposes, we're going to bit flip this range before + * continuing. + * + * Note that this requires all minor page fault tests operate on + * area_dst (non-UFFD-registered) and area_dst_alias + * (UFFD-registered). + */ + + area = (uint8_t *)(area_dst + + ((char *)msg->arg.pagefault.address - + area_dst_alias)); + for (b = 0; b < page_size; ++b) + area[b] = ~area[b]; + continue_range(uffd, msg->arg.pagefault.address, page_size); + stats->minor_faults++; } else { /* Missing page faults */ if (bounces & BOUNCE_VERIFY && @@ -779,7 +827,7 @@ static int stress(struct uffd_stats *uffd_stats) return 0; } -static int userfaultfd_open(int features) +static int userfaultfd_open_ext(uint64_t *features) { struct uffdio_api uffdio_api; @@ -792,7 +840,7 @@ static int userfaultfd_open(int features) uffd_flags = fcntl(uffd, F_GETFD, NULL); uffdio_api.api = UFFD_API; - uffdio_api.features = features; + uffdio_api.features = *features; if (ioctl(uffd, UFFDIO_API, &uffdio_api)) { fprintf(stderr, "UFFDIO_API failed.\nPlease make sure to " "run with either root or ptrace capability.\n"); @@ -804,9 +852,15 @@ static int userfaultfd_open(int features) return 1; } + *features = uffdio_api.features; return 0; } +static int userfaultfd_open(uint64_t features) +{ + return userfaultfd_open_ext(&features); +} + sigjmp_buf jbuf, *sigbuf; static void sighndl(int sig, siginfo_t *siginfo, void *ptr) @@ -1112,7 +1166,7 @@ static int userfaultfd_events_test(void) } if (!pid) - return faulting_process(0); + exit(faulting_process(0)); waitpid(pid, &err, 0); if (err) { @@ -1215,6 +1269,102 @@ static int userfaultfd_sig_test(void) return userfaults != 0; } +static int userfaultfd_minor_test(void) +{ + struct uffdio_register uffdio_register; + unsigned long expected_ioctls; + unsigned long p; + pthread_t uffd_mon; + uint8_t expected_byte; + void *expected_page; + char c; + struct uffd_stats stats = { 0 }; + uint64_t features = UFFD_FEATURE_MINOR_HUGETLBFS; + + if (!test_uffdio_minor) + return 0; + + printf("testing minor faults: "); + fflush(stdout); + + if (uffd_test_ops->release_pages(area_dst)) + return 1; + + if (userfaultfd_open_ext(&features)) + return 1; + /* If kernel reports the feature isn't supported, skip the test. */ + if (!(features & UFFD_FEATURE_MINOR_HUGETLBFS)) { + printf("skipping test due to lack of feature support\n"); + fflush(stdout); + return 0; + } + + uffdio_register.range.start = (unsigned long)area_dst_alias; + uffdio_register.range.len = nr_pages * page_size; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MINOR; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } + + expected_ioctls = uffd_test_ops->expected_ioctls; + expected_ioctls |= 1 << _UFFDIO_CONTINUE; + if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { + fprintf(stderr, "unexpected missing ioctl(s)\n"); + exit(1); + } + + /* + * After registering with UFFD, populate the non-UFFD-registered side of + * the shared mapping. This should *not* trigger any UFFD minor faults. + */ + for (p = 0; p < nr_pages; ++p) { + memset(area_dst + (p * page_size), p % ((uint8_t)-1), + page_size); + } + + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) { + perror("uffd_poll_thread create"); + exit(1); + } + + /* + * Read each of the pages back using the UFFD-registered mapping. We + * expect that the first time we touch a page, it will result in a minor + * fault. uffd_poll_thread will resolve the fault by bit-flipping the + * page's contents, and then issuing a CONTINUE ioctl. + */ + + if (posix_memalign(&expected_page, page_size, page_size)) { + fprintf(stderr, "out of memory\n"); + return 1; + } + + for (p = 0; p < nr_pages; ++p) { + expected_byte = ~((uint8_t)(p % ((uint8_t)-1))); + memset(expected_page, expected_byte, page_size); + if (my_bcmp(expected_page, area_dst_alias + (p * page_size), + page_size)) { + fprintf(stderr, + "unexpected page contents after minor fault\n"); + exit(1); + } + } + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) { + perror("pipe write"); + exit(1); + } + if (pthread_join(uffd_mon, NULL)) + return 1; + + close(uffd); + + uffd_stats_report(&stats, 1); + + return stats.missing_faults != 0 || stats.minor_faults != nr_pages; +} + static int userfaultfd_stress(void) { void *area; @@ -1413,7 +1563,7 @@ static int userfaultfd_stress(void) close(uffd); return userfaultfd_zeropage_test() || userfaultfd_sig_test() - || userfaultfd_events_test(); + || userfaultfd_events_test() || userfaultfd_minor_test(); } /* @@ -1454,6 +1604,8 @@ static void set_test_type(const char *type) map_shared = true; test_type = TEST_HUGETLB; uffd_test_ops = &hugetlb_uffd_test_ops; + /* Minor faults require shared hugetlb; only enable here. */ + test_uffdio_minor = true; } else if (!strcmp(type, "shmem")) { map_shared = true; test_type = TEST_SHMEM; -- cgit v1.2.3 From 79dbf135e2481eaa77b172d88c343bf85e021545 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 4 May 2021 18:39:23 -0700 Subject: selftests/vm: gup_test: fix test flag In gup_test both gup_flags and test_flags use the same flags field. This is broken. Farther, in the actual gup_test.c all the passed gup_flags are erased and unconditionally replaced with FOLL_WRITE. Which means that test_flags are ignored, and code like this always performs pin dump test: 155 if (gup->flags & GUP_TEST_FLAG_DUMP_PAGES_USE_PIN) 156 nr = pin_user_pages(addr, nr, gup->flags, 157 pages + i, NULL); 158 else 159 nr = get_user_pages(addr, nr, gup->flags, 160 pages + i, NULL); 161 break; Add a new test_flags field, to allow raw gup_flags to work. Add a new subcommand for DUMP_USER_PAGES_TEST to specify that pin test should be performed. Remove unconditional overwriting of gup_flags via FOLL_WRITE. But, preserve the previous behaviour where FOLL_WRITE was the default flag, and add a new option "-W" to unset FOLL_WRITE. Rename flags with gup_flags. With the fix, dump works like this: root@virtme:/# gup_test -c ---- page #0, starting from user virt addr: 0x7f8acb9e4000 page:00000000d3d2ee27 refcount:2 mapcount:1 mapping:0000000000000000 index:0x0 pfn:0x100bcf anon flags: 0x300000000080016(referenced|uptodate|lru|swapbacked) raw: 0300000000080016 ffffd0e204021608 ffffd0e208df2e88 ffff8ea04243ec61 raw: 0000000000000000 0000000000000000 0000000200000000 0000000000000000 page dumped because: gup_test: dump_pages() test DUMP_USER_PAGES_TEST: done root@virtme:/# gup_test -c -p ---- page #0, starting from user virt addr: 0x7fd19701b000 page:00000000baed3c7d refcount:1025 mapcount:1 mapping:0000000000000000 index:0x0 pfn:0x108008 anon flags: 0x300000000080014(uptodate|lru|swapbacked) raw: 0300000000080014 ffffd0e204200188 ffffd0e205e09088 ffff8ea04243ee71 raw: 0000000000000000 0000000000000000 0000040100000000 0000000000000000 page dumped because: gup_test: dump_pages() test DUMP_USER_PAGES_TEST: done Refcount shows the difference between pin vs no-pin case. Also change type of nr from int to long, as it counts number of pages. Link: https://lkml.kernel.org/r/20210215161349.246722-14-pasha.tatashin@soleen.com Signed-off-by: Pavel Tatashin Reviewed-by: John Hubbard Cc: Dan Williams Cc: David Hildenbrand Cc: David Rientjes Cc: Ingo Molnar Cc: Ira Weiny Cc: James Morris Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt (VMware) Cc: Tyler Hicks Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/gup_test.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index 6c6336dd3b7f..943cc2608dc2 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -37,13 +37,13 @@ int main(int argc, char **argv) { struct gup_test gup = { 0 }; unsigned long size = 128 * MB; - int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0; + int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 1; unsigned long cmd = GUP_FAST_BENCHMARK; int flags = MAP_PRIVATE; char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwSH")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwWSHp")) != -1) { switch (opt) { case 'a': cmd = PIN_FAST_BENCHMARK; @@ -65,9 +65,13 @@ int main(int argc, char **argv) */ gup.which_pages[0] = 1; break; + case 'p': + /* works only with DUMP_USER_PAGES_TEST */ + gup.test_flags |= GUP_TEST_FLAG_DUMP_PAGES_USE_PIN; + break; case 'F': /* strtol, so you can pass flags in hex form */ - gup.flags = strtol(optarg, 0, 0); + gup.gup_flags = strtol(optarg, 0, 0); break; case 'm': size = atoi(optarg) * MB; @@ -93,6 +97,9 @@ int main(int argc, char **argv) case 'w': write = 1; break; + case 'W': + write = 0; + break; case 'f': file = optarg; break; @@ -140,7 +147,7 @@ int main(int argc, char **argv) gup.nr_pages_per_call = nr_pages; if (write) - gup.flags |= FOLL_WRITE; + gup.gup_flags |= FOLL_WRITE; fd = open("/sys/kernel/debug/gup_test", O_RDWR); if (fd == -1) { -- cgit v1.2.3 From e44605a8b1aa13d892addc59ec3d416cb186c77b Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Tue, 4 May 2021 18:39:27 -0700 Subject: selftests/vm: gup_test: test faulting in kernel, and verify pinnable pages When pages are pinned they can be faulted in userland and migrated, and they can be faulted right in kernel without migration. In either case, the pinned pages must end-up being pinnable (not movable). Add a new test to gup_test, to help verify that the gup/pup (get_user_pages() / pin_user_pages()) behavior with respect to pinnable and movable pages is reasonable and correct. Specifically, provide a way to: 1) Verify that only "pinnable" pages are pinned. This is checked automatically for you. 2) Verify that gup/pup performance is reasonable. This requires comparing benchmarks between doing gup/pup on pages that have been pre-faulted in from user space, vs. doing gup/pup on pages that are not faulted in until gup/pup time (via FOLL_TOUCH). This decision is controlled with the new -z command line option. Link: https://lkml.kernel.org/r/20210215161349.246722-15-pasha.tatashin@soleen.com Signed-off-by: Pavel Tatashin Reviewed-by: John Hubbard Cc: Dan Williams Cc: David Hildenbrand Cc: David Rientjes Cc: Ingo Molnar Cc: Ira Weiny Cc: James Morris Cc: Jason Gunthorpe Cc: Jason Gunthorpe Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Mike Kravetz Cc: Oscar Salvador Cc: Peter Zijlstra Cc: Sasha Levin Cc: Steven Rostedt (VMware) Cc: Tyler Hicks Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/vm/gup_test.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/vm/gup_test.c b/tools/testing/selftests/vm/gup_test.c index 943cc2608dc2..1e662d59c502 100644 --- a/tools/testing/selftests/vm/gup_test.c +++ b/tools/testing/selftests/vm/gup_test.c @@ -13,6 +13,7 @@ /* Just the flags we need, copied from mm.h: */ #define FOLL_WRITE 0x01 /* check pte is writable */ +#define FOLL_TOUCH 0x02 /* mark page accessed */ static char *cmd_to_str(unsigned long cmd) { @@ -39,11 +40,11 @@ int main(int argc, char **argv) unsigned long size = 128 * MB; int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 1; unsigned long cmd = GUP_FAST_BENCHMARK; - int flags = MAP_PRIVATE; + int flags = MAP_PRIVATE, touch = 0; char *file = "/dev/zero"; char *p; - while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwWSHp")) != -1) { + while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwWSHpz")) != -1) { switch (opt) { case 'a': cmd = PIN_FAST_BENCHMARK; @@ -110,6 +111,10 @@ int main(int argc, char **argv) case 'H': flags |= (MAP_HUGETLB | MAP_ANONYMOUS); break; + case 'z': + /* fault pages in gup, do not fault in userland */ + touch = 1; + break; default: return -1; } @@ -167,8 +172,18 @@ int main(int argc, char **argv) else if (thp == 0) madvise(p, size, MADV_NOHUGEPAGE); - for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE) - p[0] = 0; + /* + * FOLL_TOUCH, in gup_test, is used as an either/or case: either + * fault pages in from the kernel via FOLL_TOUCH, or fault them + * in here, from user space. This allows comparison of performance + * between those two cases. + */ + if (touch) { + gup.gup_flags |= FOLL_TOUCH; + } else { + for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE) + p[0] = 0; + } /* Only report timing information on the *_BENCHMARK commands: */ if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) || -- cgit v1.2.3 From d4455faccd6cbe11ddfdbe28723a2122453b4f4e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 6 May 2021 18:02:16 -0700 Subject: proc: mandate ->proc_lseek in "struct proc_ops" Now that proc_ops are separate from file_operations and other operations it easy to check all instances to have ->proc_lseek hook and remove check in main code. Note: nonseekable_open() files naturally don't require ->proc_lseek. Garbage collect pde_lseek() function. [adobriyan@gmail.com: smoke test lseek()] Link: https://lkml.kernel.org/r/YG4OIhChOrVTPgdN@localhost.localdomain Link: https://lkml.kernel.org/r/YFYX0Bzwxlc7aBa/@localhost.localdomain Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/proc/read.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c index b3ef9e14d6cc..35ee78dff144 100644 --- a/tools/testing/selftests/proc/read.c +++ b/tools/testing/selftests/proc/read.c @@ -14,7 +14,7 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ // Test -// 1) read of every file in /proc +// 1) read and lseek on every file in /proc // 2) readlink of every symlink in /proc // 3) recursively (1) + (2) for every directory in /proc // 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs @@ -45,6 +45,8 @@ static void f_reg(DIR *d, const char *filename) fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK); if (fd == -1) return; + /* struct proc_ops::proc_lseek is mandatory if file is seekable. */ + (void)lseek(fd, 0, SEEK_SET); rv = read(fd, buf, sizeof(buf)); assert((0 <= rv && rv <= sizeof(buf)) || rv == -1); close(fd); -- cgit v1.2.3 From 268af17ada5855a9b703995125a9920ac117b56b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 6 May 2021 18:02:21 -0700 Subject: selftests: proc: test subset=pid Test that /proc instance mounted with mount -t proc -o subset=pid contains only ".", "..", "self", "thread-self" and pid directories. Note: Currently "subset=pid" doesn't return "." and ".." via readdir. This must be a bug. Link: https://lkml.kernel.org/r/YFYZZ7WGaZlsnChS@localhost.localdomain Signed-off-by: Alexey Dobriyan Acked-by: Alexey Gladkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/proc/Makefile | 1 + tools/testing/selftests/proc/proc-subset-pid.c | 121 +++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 tools/testing/selftests/proc/proc-subset-pid.c (limited to 'tools') diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile index 8be8a03d2973..1054e40a499a 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -12,6 +12,7 @@ TEST_GEN_PROGS += proc-self-map-files-001 TEST_GEN_PROGS += proc-self-map-files-002 TEST_GEN_PROGS += proc-self-syscall TEST_GEN_PROGS += proc-self-wchan +TEST_GEN_PROGS += proc-subset-pid TEST_GEN_PROGS += proc-uptime-001 TEST_GEN_PROGS += proc-uptime-002 TEST_GEN_PROGS += read diff --git a/tools/testing/selftests/proc/proc-subset-pid.c b/tools/testing/selftests/proc/proc-subset-pid.c new file mode 100644 index 000000000000..d1052bcab039 --- /dev/null +++ b/tools/testing/selftests/proc/proc-subset-pid.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2021 Alexey Dobriyan + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* + * Test that "mount -t proc -o subset=pid" hides everything but pids, + * /proc/self and /proc/thread-self. + */ +#undef NDEBUG +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static inline bool streq(const char *a, const char *b) +{ + return strcmp(a, b) == 0; +} + +static void make_private_proc(void) +{ + if (unshare(CLONE_NEWNS) == -1) { + if (errno == ENOSYS || errno == EPERM) { + exit(4); + } + exit(1); + } + if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) { + exit(1); + } + if (mount(NULL, "/proc", "proc", 0, "subset=pid") == -1) { + exit(1); + } +} + +static bool string_is_pid(const char *s) +{ + while (1) { + switch (*s++) { + case '0':case '1':case '2':case '3':case '4': + case '5':case '6':case '7':case '8':case '9': + continue; + + case '\0': + return true; + + default: + return false; + } + } +} + +int main(void) +{ + make_private_proc(); + + DIR *d = opendir("/proc"); + assert(d); + + struct dirent *de; + + bool dot = false; + bool dot_dot = false; + bool self = false; + bool thread_self = false; + + while ((de = readdir(d))) { + if (streq(de->d_name, ".")) { + assert(!dot); + dot = true; + assert(de->d_type == DT_DIR); + } else if (streq(de->d_name, "..")) { + assert(!dot_dot); + dot_dot = true; + assert(de->d_type == DT_DIR); + } else if (streq(de->d_name, "self")) { + assert(!self); + self = true; + assert(de->d_type == DT_LNK); + } else if (streq(de->d_name, "thread-self")) { + assert(!thread_self); + thread_self = true; + assert(de->d_type == DT_LNK); + } else { + if (!string_is_pid(de->d_name)) { + fprintf(stderr, "d_name '%s'\n", de->d_name); + assert(0); + } + assert(de->d_type == DT_DIR); + } + } + + char c; + int rv = readlink("/proc/cpuinfo", &c, 1); + assert(rv == -1 && errno == ENOENT); + + int fd = open("/proc/cpuinfo", O_RDONLY); + assert(fd == -1 && errno == ENOENT); + + return 0; +} -- cgit v1.2.3 From d1d1a2cd4627724c37539892db8efa611d2cbd70 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 6 May 2021 18:02:42 -0700 Subject: tools: disable -Wno-type-limits Patch series "lib/find_bit: fast path for small bitmaps", v6. Bitmap operations are much simpler and faster in case of small bitmaps which fit into a single word. In linux/bitmap.c we have a machinery that allows compiler to replace actual function call with a few instructions if bitmaps passed into the function are small and their size is known at compile time. find_*_bit() API lacks this functionality; but users will benefit from it a lot. One important example is cpumask subsystem when NR_CPUS <= BITS_PER_LONG. This patch (of 12): GENMASK(h, l) may be passed with unsigned types. In such case, type-limits warning is generated for example in case of GENMASK(h, 0). Link: https://lkml.kernel.org/r/20210401003153.97325-1-yury.norov@gmail.com Link: https://lkml.kernel.org/r/20210401003153.97325-2-yury.norov@gmail.com Signed-off-by: Yury Norov Acked-by: Rasmus Villemoes Cc: Alexey Klimov Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: David Sterba Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Jianpeng Ma Cc: Joe Perches Cc: John Paul Adrian Glaubitz Cc: Josh Poimboeuf Cc: Rich Felker Cc: Stefano Brivio Cc: Wei Yang Cc: Wolfram Sang Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/scripts/Makefile.include | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 25adfec2cb39..f9271f3ea912 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -38,6 +38,7 @@ EXTRA_WARNINGS += -Wswitch-enum EXTRA_WARNINGS += -Wundef EXTRA_WARNINGS += -Wwrite-strings EXTRA_WARNINGS += -Wformat +EXTRA_WARNINGS += -Wno-type-limits # Makefiles suck: This macro sets a default value of $(2) for the # variable named by $(1), unless the variable has been set by -- cgit v1.2.3 From e5b9252d9000fc82324af5864701c1daffeebd7e Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 6 May 2021 18:02:46 -0700 Subject: tools: bitmap: sync function declarations with the kernel Some functions in tools/include/linux/bitmap.h declare nbits as int. In the kernel nbits is declared as unsigned int. Link: https://lkml.kernel.org/r/20210401003153.97325-3-yury.norov@gmail.com Signed-off-by: Yury Norov Acked-by: Rasmus Villemoes Cc: Alexey Klimov Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: David Sterba Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Jianpeng Ma Cc: Joe Perches Cc: John Paul Adrian Glaubitz Cc: Josh Poimboeuf Cc: Rich Felker Cc: Stefano Brivio Cc: Wei Yang Cc: Wolfram Sang Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/linux/bitmap.h | 8 ++++---- tools/lib/bitmap.c | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 477a1cae513f..7cbd23e56d48 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -30,7 +30,7 @@ void bitmap_clear(unsigned long *map, unsigned int start, int len); #define small_const_nbits(nbits) \ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) -static inline void bitmap_zero(unsigned long *dst, int nbits) +static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = 0UL; @@ -66,7 +66,7 @@ static inline int bitmap_full(const unsigned long *src, unsigned int nbits) return find_first_zero_bit(src, nbits) == nbits; } -static inline int bitmap_weight(const unsigned long *src, int nbits) +static inline int bitmap_weight(const unsigned long *src, unsigned int nbits) { if (small_const_nbits(nbits)) return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); @@ -74,7 +74,7 @@ static inline int bitmap_weight(const unsigned long *src, int nbits) } static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, - const unsigned long *src2, int nbits) + const unsigned long *src2, unsigned int nbits) { if (small_const_nbits(nbits)) *dst = *src1 | *src2; @@ -141,7 +141,7 @@ static inline void bitmap_free(unsigned long *bitmap) * @buf: buffer to store output * @size: size of @buf */ -size_t bitmap_scnprintf(unsigned long *bitmap, int nbits, +size_t bitmap_scnprintf(unsigned long *bitmap, unsigned int nbits, char *buf, size_t size); /** diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index 5043747ef6c5..f4e914712b6f 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -28,11 +28,11 @@ void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, dst[k] = bitmap1[k] | bitmap2[k]; } -size_t bitmap_scnprintf(unsigned long *bitmap, int nbits, +size_t bitmap_scnprintf(unsigned long *bitmap, unsigned int nbits, char *buf, size_t size) { /* current bit is 'cur', most recently seen range is [rbot, rtop] */ - int cur, rbot, rtop; + unsigned int cur, rbot, rtop; bool first = true; size_t ret = 0; -- cgit v1.2.3 From a719101f19d2b4f107c8a79ed8b2866832a1816f Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 6 May 2021 18:02:49 -0700 Subject: tools: sync BITMAP_LAST_WORD_MASK() macro with the kernel Kernel version generates better code. Link: https://lkml.kernel.org/r/20210401003153.97325-4-yury.norov@gmail.com Signed-off-by: Yury Norov Acked-by: Rasmus Villemoes Cc: Alexey Klimov Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: David Sterba Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Jianpeng Ma Cc: Joe Perches Cc: John Paul Adrian Glaubitz Cc: Josh Poimboeuf Cc: Rich Felker Cc: Stefano Brivio Cc: Wei Yang Cc: Wolfram Sang Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/linux/bitmap.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 7cbd23e56d48..4aabc23ec747 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -20,12 +20,7 @@ int __bitmap_equal(const unsigned long *bitmap1, void bitmap_clear(unsigned long *map, unsigned int start, int len); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) - -#define BITMAP_LAST_WORD_MASK(nbits) \ -( \ - ((nbits) % BITS_PER_LONG) ? \ - (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ -) +#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) #define small_const_nbits(nbits) \ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) -- cgit v1.2.3 From 78e48f0667ff11ee444e057c757896062b6ad06b Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 6 May 2021 18:03:00 -0700 Subject: tools: sync small_const_nbits() macro with the kernel Sync implementation with the kernel and move the macro from tools/include/linux/bitmap.h to tools/include/asm-generic/bitsperlong.h Link: https://lkml.kernel.org/r/20210401003153.97325-7-yury.norov@gmail.com Signed-off-by: Yury Norov Acked-by: Rasmus Villemoes Cc: Alexey Klimov Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: David Sterba Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Jianpeng Ma Cc: Joe Perches Cc: John Paul Adrian Glaubitz Cc: Josh Poimboeuf Cc: Rich Felker Cc: Stefano Brivio Cc: Wei Yang Cc: Wolfram Sang Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/asm-generic/bitsperlong.h | 3 +++ tools/include/linux/bitmap.h | 3 --- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/include/asm-generic/bitsperlong.h b/tools/include/asm-generic/bitsperlong.h index 8f2283052333..2093d56ddd11 100644 --- a/tools/include/asm-generic/bitsperlong.h +++ b/tools/include/asm-generic/bitsperlong.h @@ -18,4 +18,7 @@ #define BITS_PER_LONG_LONG 64 #endif +#define small_const_nbits(nbits) \ + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0) + #endif /* __ASM_GENERIC_BITS_PER_LONG */ diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 4aabc23ec747..330dbf7509cc 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -22,9 +22,6 @@ void bitmap_clear(unsigned long *map, unsigned int start, int len); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) -#define small_const_nbits(nbits) \ - (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) - static inline void bitmap_zero(unsigned long *dst, unsigned int nbits) { if (small_const_nbits(nbits)) -- cgit v1.2.3 From ea81c1ef441733ee779d776292d6269a97c5d2e1 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 6 May 2021 18:03:07 -0700 Subject: tools: sync find_next_bit implementation Sync the implementation with recent kernel changes. Link: https://lkml.kernel.org/r/20210401003153.97325-9-yury.norov@gmail.com Signed-off-by: Yury Norov Acked-by: Rasmus Villemoes Cc: Alexey Klimov Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: David Sterba Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Jianpeng Ma Cc: Joe Perches Cc: John Paul Adrian Glaubitz Cc: Josh Poimboeuf Cc: Rich Felker Cc: Stefano Brivio Cc: Wei Yang Cc: Wolfram Sang Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/asm-generic/bitops/find.h | 27 +++++++++++++---- tools/lib/find_bit.c | 52 +++++++++++++-------------------- 2 files changed, 42 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h index 16ed1982cb34..9fe62d10b084 100644 --- a/tools/include/asm-generic/bitops/find.h +++ b/tools/include/asm-generic/bitops/find.h @@ -2,6 +2,10 @@ #ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ #define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ +extern unsigned long _find_next_bit(const unsigned long *addr1, + const unsigned long *addr2, unsigned long nbits, + unsigned long start, unsigned long invert, unsigned long le); + #ifndef find_next_bit /** * find_next_bit - find the next set bit in a memory region @@ -12,8 +16,12 @@ * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -extern unsigned long find_next_bit(const unsigned long *addr, unsigned long - size, unsigned long offset); +static inline +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, NULL, size, offset, 0UL, 0); +} #endif #ifndef find_next_and_bit @@ -27,13 +35,16 @@ extern unsigned long find_next_bit(const unsigned long *addr, unsigned long * Returns the bit number for the next set bit * If no bits are set, returns @size. */ -extern unsigned long find_next_and_bit(const unsigned long *addr1, +static inline +unsigned long find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, - unsigned long offset); + unsigned long offset) +{ + return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); +} #endif #ifndef find_next_zero_bit - /** * find_next_zero_bit - find the next cleared bit in a memory region * @addr: The address to base the search on @@ -43,8 +54,12 @@ extern unsigned long find_next_and_bit(const unsigned long *addr1, * Returns the bit number of the next zero bit * If no bits are zero, returns @size. */ +static inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset); + unsigned long offset) +{ + return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); +} #endif #ifndef find_first_bit diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index ac37022e9486..589fd2f26f94 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -28,11 +28,12 @@ * searching it for one bits. * - The optional "addr2", which is anded with "addr1" if present. */ -static inline unsigned long _find_next_bit(const unsigned long *addr1, +unsigned long _find_next_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, - unsigned long start, unsigned long invert) + unsigned long start, unsigned long invert, unsigned long le) { - unsigned long tmp; + unsigned long tmp, mask; + (void) le; if (unlikely(start >= nbits)) return nbits; @@ -43,7 +44,19 @@ static inline unsigned long _find_next_bit(const unsigned long *addr1, tmp ^= invert; /* Handle 1st word. */ - tmp &= BITMAP_FIRST_WORD_MASK(start); + mask = BITMAP_FIRST_WORD_MASK(start); + + /* + * Due to the lack of swab() in tools, and the fact that it doesn't + * need little-endian support, just comment it out + */ +#if (0) + if (le) + mask = swab(mask); +#endif + + tmp &= mask; + start = round_down(start, BITS_PER_LONG); while (!tmp) { @@ -57,18 +70,12 @@ static inline unsigned long _find_next_bit(const unsigned long *addr1, tmp ^= invert; } - return min(start + __ffs(tmp), nbits); -} +#if (0) + if (le) + tmp = swab(tmp); #endif -#ifndef find_next_bit -/* - * Find the next set bit in a memory region. - */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - return _find_next_bit(addr, NULL, size, offset, 0UL); + return min(start + __ffs(tmp), nbits); } #endif @@ -105,20 +112,3 @@ unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) return size; } #endif - -#ifndef find_next_zero_bit -unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - return _find_next_bit(addr, NULL, size, offset, ~0UL); -} -#endif - -#ifndef find_next_and_bit -unsigned long find_next_and_bit(const unsigned long *addr1, - const unsigned long *addr2, unsigned long size, - unsigned long offset) -{ - return _find_next_bit(addr1, addr2, size, offset, 0UL); -} -#endif -- cgit v1.2.3 From eaae7841ba83bb42dcac3177dc65f8dd974e6c0b Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 6 May 2021 18:03:18 -0700 Subject: tools: sync lib/find_bit implementation Add fast paths to find_*_bit() functions as per kernel implementation. Link: https://lkml.kernel.org/r/20210401003153.97325-12-yury.norov@gmail.com Signed-off-by: Yury Norov Acked-by: Rasmus Villemoes Cc: Alexey Klimov Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: David Sterba Cc: Dennis Zhou Cc: Geert Uytterhoeven Cc: Jianpeng Ma Cc: Joe Perches Cc: John Paul Adrian Glaubitz Cc: Josh Poimboeuf Cc: Rich Felker Cc: Stefano Brivio Cc: Wei Yang Cc: Wolfram Sang Cc: Yoshinori Sato Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/include/asm-generic/bitops/find.h | 58 +++++++++++++++++++++++++++++++-- tools/lib/find_bit.c | 4 +-- 2 files changed, 57 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h index 9fe62d10b084..6481fd11012a 100644 --- a/tools/include/asm-generic/bitops/find.h +++ b/tools/include/asm-generic/bitops/find.h @@ -5,6 +5,9 @@ extern unsigned long _find_next_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start, unsigned long invert, unsigned long le); +extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); +extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size); #ifndef find_next_bit /** @@ -20,6 +23,16 @@ static inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + return _find_next_bit(addr, NULL, size, offset, 0UL, 0); } #endif @@ -40,6 +53,16 @@ unsigned long find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size, unsigned long offset) { + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr1 & *addr2 & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + return _find_next_bit(addr1, addr2, size, offset, 0UL, 0); } #endif @@ -58,6 +81,16 @@ static inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr | ~GENMASK(size - 1, offset); + return val == ~0UL ? size : ffz(val); + } + return _find_next_bit(addr, NULL, size, offset, ~0UL, 0); } #endif @@ -72,8 +105,17 @@ unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, * Returns the bit number of the first set bit. * If no bits are set, returns @size. */ -extern unsigned long find_first_bit(const unsigned long *addr, - unsigned long size); +static inline +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_bit(addr, size); +} #endif /* find_first_bit */ @@ -87,7 +129,17 @@ extern unsigned long find_first_bit(const unsigned long *addr, * Returns the bit number of the first cleared bit. * If no bits are zero, returns @size. */ -unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size); +static inline +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr | ~GENMASK(size - 1, 0); + + return val == ~0UL ? size : ffz(val); + } + + return _find_first_zero_bit(addr, size); +} #endif #endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */ diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c index 589fd2f26f94..109aa7ffcf97 100644 --- a/tools/lib/find_bit.c +++ b/tools/lib/find_bit.c @@ -83,7 +83,7 @@ unsigned long _find_next_bit(const unsigned long *addr1, /* * Find the first set bit in a memory region. */ -unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +unsigned long _find_first_bit(const unsigned long *addr, unsigned long size) { unsigned long idx; @@ -100,7 +100,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) /* * Find the first cleared bit in a memory region. */ -unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size) { unsigned long idx; -- cgit v1.2.3 From 1e3b918d1dd18bcea3df9339c2d8910ffa95686a Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Thu, 6 May 2021 18:04:04 -0700 Subject: kselftest: introduce new epoll test case Patch series "fs/epoll: restore user-visible behavior upon event ready". This series tries to address a change in user visible behavior, reported in https://bugzilla.kernel.org/show_bug.cgi?id=208943. Epoll does not report an event to all the threads running epoll_wait() on the same epoll descriptor. Unsurprisingly, this was bisected back to 339ddb53d373 (fs/epoll: remove unnecessary wakeups of nested epoll), which has had various problems in the past, beyond only nested epoll usage. This patch (of 2): This incorporates the testcase originally reported in: https://bugzilla.kernel.org/show_bug.cgi?id=208943 Which ensures an event is reported to all threads blocked on the same epoll descriptor, otherwise only a single thread will receive the wakeup once the event become ready. Link: https://lkml.kernel.org/r/20210405231025.33829-1-dave@stgolabs.net Link: https://lkml.kernel.org/r/20210405231025.33829-2-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Cc: Jason Baron Cc: Roman Penyaev Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .../filesystems/epoll/epoll_wakeup_test.c | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index ad7fabd575f9..65ede506305c 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -3449,4 +3449,48 @@ TEST(epoll63) close(sfd[1]); } +/* + * t0 t1 + * (ew) \ / (ew) + * e0 + * | (lt) + * s0 + */ +TEST(epoll64) +{ + pthread_t waiter[2]; + struct epoll_event e; + struct epoll_mtcontext ctx = { 0 }; + + signal(SIGUSR1, signal_handler); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0); + + ctx.efd[0] = epoll_create(1); + ASSERT_GE(ctx.efd[0], 0); + + e.events = EPOLLIN; + ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0); + + /* + * main will act as the emitter once both waiter threads are + * blocked and expects to both be awoken upon the ready event. + */ + ctx.main = pthread_self(); + ASSERT_EQ(pthread_create(&waiter[0], NULL, waiter_entry1a, &ctx), 0); + ASSERT_EQ(pthread_create(&waiter[1], NULL, waiter_entry1a, &ctx), 0); + + usleep(100000); + ASSERT_EQ(write(ctx.sfd[1], "w", 1), 1); + + ASSERT_EQ(pthread_join(waiter[0], NULL), 0); + ASSERT_EQ(pthread_join(waiter[1], NULL), 0); + + EXPECT_EQ(ctx.count, 2); + + close(ctx.efd[0]); + close(ctx.sfd[0]); + close(ctx.sfd[1]); +} + TEST_HARNESS_MAIN -- cgit v1.2.3 From 9c39c6ffe0c2945c7cf814814c096bc23b63f53d Mon Sep 17 00:00:00 2001 From: Zhang Yunkai Date: Thu, 6 May 2021 18:05:33 -0700 Subject: selftests: remove duplicate include 'assert.h' included in 'sparsebit.c' is duplicated. It is also included in the 161th line. 'string.h' included in 'mincore_selftest.c' is duplicated. It is also included in the 15th line. 'sched.h' included in 'tlbie_test.c' is duplicated. It is also included in the 33th line. Link: https://lkml.kernel.org/r/20210316073336.426255-1-zhang.yunkai@zte.com.cn Signed-off-by: Zhang Yunkai Cc: Paolo Bonzini Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/kvm/lib/sparsebit.c | 1 - tools/testing/selftests/mincore/mincore_selftest.c | 1 - tools/testing/selftests/powerpc/mm/tlbie_test.c | 1 - 3 files changed, 3 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 031ba3c932ed..a0d0c83d83de 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -1890,7 +1890,6 @@ void sparsebit_validate_internal(struct sparsebit *s) */ #include -#include struct range { sparsebit_idx_t first, last; diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index 5a1e85ff5d32..e54106643337 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -14,7 +14,6 @@ #include #include #include -#include #include "../kselftest.h" #include "../kselftest_harness.h" diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c index f85a0938ab25..48344a74b212 100644 --- a/tools/testing/selftests/powerpc/mm/tlbie_test.c +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From fa60ce2cb4506701c43bd4cf3ca23d970daf1b9c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 6 May 2021 18:06:44 -0700 Subject: treewide: remove editor modelines and cruft The section "19) Editor modelines and other cruft" in Documentation/process/coding-style.rst clearly says, "Do not include any of these in source files." I recently receive a patch to explicitly add a new one. Let's do treewide cleanups, otherwise some people follow the existing code and attempt to upstream their favoriate editor setups. It is even nicer if scripts/checkpatch.pl can check it. If we like to impose coding style in an editor-independent manner, I think editorconfig (patch [1]) is a saner solution. [1] https://lore.kernel.org/lkml/20200703073143.423557-1-danny@kdrag0n.dev/ Link: https://lkml.kernel.org/r/20210324054457.1477489-1-masahiroy@kernel.org Signed-off-by: Masahiro Yamada Acked-by: Geert Uytterhoeven Reviewed-by: Miguel Ojeda [auxdisplay] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/usb/hcd-tests.sh | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/usb/hcd-tests.sh b/tools/usb/hcd-tests.sh index e8cad6a4f9c9..73f914d13f5c 100644 --- a/tools/usb/hcd-tests.sh +++ b/tools/usb/hcd-tests.sh @@ -272,5 +272,3 @@ do echo '' done done - -# vim: sw=4 -- cgit v1.2.3